phase 3

2026-03-27 22:54:58 -03:00
parent 3d8e7291f3
commit 94c7b21ae5
8 changed files with 233 additions and 224 deletions
--- a/admin/mpr/media_assets/models.py
+++ b/admin/mpr/media_assets/models.py
@@ -13,22 +13,6 @@ class AssetStatus(models.TextChoices):
    ERROR = "error", "Error"

 class JobStatus(models.TextChoices):
-    PENDING = "pending", "Pending"
-    PROCESSING = "processing", "Processing"
-    COMPLETED = "completed", "Completed"
-    FAILED = "failed", "Failed"
-    CANCELLED = "cancelled", "Cancelled"
-
-class ChunkJobStatus(models.TextChoices):
-    PENDING = "pending", "Pending"
-    CHUNKING = "chunking", "Chunking"
-    PROCESSING = "processing", "Processing"
-    COLLECTING = "collecting", "Collecting"
-    COMPLETED = "completed", "Completed"
-    FAILED = "failed", "Failed"
-    CANCELLED = "cancelled", "Cancelled"
-
-class DetectJobStatus(models.TextChoices):
    PENDING = "pending", "Pending"
    RUNNING = "running", "Running"
    PAUSED = "paused", "Paused"
@@ -111,81 +95,17 @@ class TranscodePreset(models.Model):
        return self.name


-class TranscodeJob(models.Model):
-    """A transcoding or trimming job in the queue."""
-
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    source_asset_id = models.UUIDField()
-    preset_id = models.UUIDField(null=True, blank=True)
-    preset_snapshot = models.JSONField(default=dict, blank=True)
-    trim_start = models.FloatField(null=True, blank=True, default=None)
-    trim_end = models.FloatField(null=True, blank=True, default=None)
-    output_filename = models.CharField(max_length=500)
-    output_path = models.CharField(max_length=1000, null=True, blank=True)
-    output_asset_id = models.UUIDField(null=True, blank=True)
-    status = models.CharField(max_length=20, choices=JobStatus.choices, default=JobStatus.PENDING)
-    progress = models.FloatField(default=0.0)
-    current_frame = models.IntegerField(null=True, blank=True, default=None)
-    current_time = models.FloatField(null=True, blank=True, default=None)
-    speed = models.CharField(max_length=255, null=True, blank=True)
-    error_message = models.TextField(blank=True, default='')
-    celery_task_id = models.CharField(max_length=255, null=True, blank=True)
-    execution_arn = models.CharField(max_length=255, null=True, blank=True)
-    priority = models.IntegerField(default=0)
-    created_at = models.DateTimeField(auto_now_add=True)
-    started_at = models.DateTimeField(null=True, blank=True)
-    completed_at = models.DateTimeField(null=True, blank=True)
-
-    class Meta:
-        ordering = ["-created_at"]
-
-    def __str__(self):
-        return str(self.id)
-
-
-class ChunkJob(models.Model):
-    """A chunk pipeline job — splits a media file into chunks and processes them"""
-
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    source_asset_id = models.UUIDField()
-    chunk_duration = models.FloatField(default=10.0)
-    num_workers = models.IntegerField(default=4)
-    max_retries = models.IntegerField(default=3)
-    processor_type = models.CharField(max_length=255)
-    status = models.CharField(max_length=20, choices=ChunkJobStatus.choices, default=ChunkJobStatus.PENDING)
-    progress = models.FloatField(default=0.0)
-    total_chunks = models.IntegerField(default=0)
-    processed_chunks = models.IntegerField(default=0)
-    failed_chunks = models.IntegerField(default=0)
-    retry_count = models.IntegerField(default=0)
-    error_message = models.TextField(blank=True, default='')
-    throughput_mbps = models.FloatField(null=True, blank=True, default=None)
-    elapsed_seconds = models.FloatField(null=True, blank=True, default=None)
-    celery_task_id = models.CharField(max_length=255, null=True, blank=True)
-    priority = models.IntegerField(default=0)
-    created_at = models.DateTimeField(auto_now_add=True)
-    started_at = models.DateTimeField(null=True, blank=True)
-    completed_at = models.DateTimeField(null=True, blank=True)
-
-    class Meta:
-        ordering = ["-created_at"]
-
-    def __str__(self):
-        return str(self.id)
-
-
-class DetectJob(models.Model):
-    """A detection pipeline job."""
+class Job(models.Model):
+    """A pipeline job."""

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    source_asset_id = models.UUIDField()
    video_path = models.CharField(max_length=1000)
    profile_name = models.CharField(max_length=255)
-    parent_job_id = models.UUIDField(null=True, blank=True)
+    parent_id = models.UUIDField(null=True, blank=True)
    run_type = models.CharField(max_length=20, choices=RunType.choices, default=RunType.INITIAL)
-    replay_from_stage = models.CharField(max_length=255, null=True, blank=True)
    config_overrides = models.JSONField(default=dict, blank=True)
-    status = models.CharField(max_length=20, choices=DetectJobStatus.choices, default=DetectJobStatus.PENDING)
+    status = models.CharField(max_length=20, choices=JobStatus.choices, default=JobStatus.PENDING)
    current_stage = models.CharField(max_length=255, null=True, blank=True)
    progress = models.FloatField(default=0.0)
    error_message = models.TextField(blank=True, default='')
@@ -206,26 +126,17 @@ class DetectJob(models.Model):
        return str(self.id)


-class StageCheckpoint(models.Model):
-    """A checkpoint saved after a pipeline stage completes."""
+class Timeline(models.Model):
+    """The frame sequence from a source video."""

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    job_id = models.UUIDField()
-    stage = models.CharField(max_length=255)
-    stage_index = models.IntegerField()
+    source_asset_id = models.UUIDField(null=True, blank=True)
+    source_video = models.CharField(max_length=255)
+    profile_name = models.CharField(max_length=255)
+    fps = models.FloatField(default=2.0)
    frames_prefix = models.CharField(max_length=255)
    frames_manifest = models.JSONField(default=dict, blank=True)
    frames_meta = models.JSONField(default=list, blank=True)
-    filtered_frame_sequences = models.JSONField(default=list, blank=True)
-    boxes_by_frame = models.JSONField(default=dict, blank=True)
-    text_candidates = models.JSONField(default=list, blank=True)
-    unresolved_candidates = models.JSONField(default=list, blank=True)
-    detections = models.JSONField(default=list, blank=True)
-    stats = models.JSONField(default=dict, blank=True)
-    config_snapshot = models.JSONField(default=dict, blank=True)
-    config_overrides = models.JSONField(default=dict, blank=True)
-    video_path = models.CharField(max_length=1000)
-    profile_name = models.CharField(max_length=255)
    created_at = models.DateTimeField(auto_now_add=True)

    class Meta:
@@ -235,15 +146,36 @@ class StageCheckpoint(models.Model):
        return str(self.id)


-class KnownBrand(models.Model):
+class Checkpoint(models.Model):
+    """A snapshot of pipeline state on a timeline."""
+
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+    timeline_id = models.UUIDField()
+    parent_id = models.UUIDField(null=True, blank=True)
+    stage_outputs = models.JSONField(default=dict, blank=True)
+    config_overrides = models.JSONField(default=dict, blank=True)
+    stats = models.JSONField(default=dict, blank=True)
+    is_scenario = models.BooleanField(default=False)
+    scenario_label = models.CharField(max_length=255)
+    created_at = models.DateTimeField(auto_now_add=True)
+
+    class Meta:
+        ordering = ["-created_at"]
+
+    def __str__(self):
+        return str(self.id)
+
+
+class Brand(models.Model):
    """A brand discovered or registered in the system."""

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    canonical_name = models.CharField(max_length=255)
    aliases = models.JSONField(default=list, blank=True)
-    first_source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
-    total_occurrences = models.IntegerField(default=0)
+    source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
    confirmed = models.BooleanField(default=False)
+    airings = models.JSONField(default=list, blank=True)
+    total_airings = models.IntegerField(default=0)
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

@@ -253,24 +185,3 @@ class KnownBrand(models.Model):
    def __str__(self):
        return str(self.id)

-
-class SourceBrandSighting(models.Model):
-    """A brand seen in a specific source (video/asset)."""
-
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    source_asset_id = models.UUIDField()
-    brand_id = models.UUIDField()
-    brand_name = models.CharField(max_length=255)
-    first_seen_timestamp = models.FloatField(default=0.0)
-    last_seen_timestamp = models.FloatField(default=0.0)
-    occurrences = models.IntegerField(default=0)
-    detection_source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
-    avg_confidence = models.FloatField(default=0.0)
-    created_at = models.DateTimeField(auto_now_add=True)
-
-    class Meta:
-        ordering = ["-created_at"]
-
-    def __str__(self):
-        return str(self.id)
-
--- a/core/db/models.py
+++ b/core/db/models.py
@@ -0,0 +1,156 @@
+"""
+SQLModel Table Models - GENERATED FILE
+
+Do not edit directly. Regenerate using modelgen.
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from uuid import UUID, uuid4
+
+from sqlmodel import SQLModel, Field, Column
+from sqlalchemy import JSON
+
+class AssetStatus(str, Enum):
+    PENDING = "pending"
+    READY = "ready"
+    ERROR = "error"
+
+class JobStatus(str, Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    PAUSED = "paused"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+class RunType(str, Enum):
+    INITIAL = "initial"
+    REPLAY = "replay"
+    RETRY = "retry"
+
+class BrandSource(str, Enum):
+    OCR = "ocr"
+    VLM = "local_vlm"
+    CLOUD = "cloud_llm"
+    MANUAL = "manual"
+
+class SourceType(str, Enum):
+    CHUNK_JOB = "chunk_job"
+    UPLOAD = "upload"
+    DEVICE = "device"
+    STREAM = "stream"
+
+class MediaAsset(SQLModel, table=True):
+    """A video/audio file registered in the system."""
+    __tablename__ = "media_assets"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    filename: str
+    file_path: str
+    status: AssetStatus = "pending"
+    error_message: Optional[str] = None
+    file_size: Optional[int] = None
+    duration: Optional[float] = None
+    video_codec: Optional[str] = None
+    audio_codec: Optional[str] = None
+    width: Optional[int] = None
+    height: Optional[int] = None
+    framerate: Optional[float] = None
+    bitrate: Optional[int] = None
+    properties: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    comments: str = ""
+    tags: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+    updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+
+class TranscodePreset(SQLModel, table=True):
+    """A reusable transcoding configuration (like Handbrake presets)."""
+    __tablename__ = "transcode_presets"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    name: str
+    description: str = ""
+    is_builtin: bool = False
+    container: str = "mp4"
+    video_codec: str = "libx264"
+    video_bitrate: Optional[str] = None
+    video_crf: Optional[int] = None
+    video_preset: Optional[str] = None
+    resolution: Optional[str] = None
+    framerate: Optional[float] = None
+    audio_codec: str = "aac"
+    audio_bitrate: Optional[str] = None
+    audio_channels: Optional[int] = None
+    audio_samplerate: Optional[int] = None
+    extra_args: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+    updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+
+class Job(SQLModel, table=True):
+    """A pipeline job."""
+    __tablename__ = "jobs"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    source_asset_id: UUID = Field(index=True)
+    video_path: str
+    profile_name: str = "soccer_broadcast"
+    parent_id: Optional[UUID] = None
+    run_type: RunType = "initial"
+    config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    status: JobStatus = "pending"
+    current_stage: Optional[str] = None
+    progress: float = 0.0
+    error_message: Optional[str] = None
+    total_detections: int = 0
+    brands_found: int = 0
+    cloud_llm_calls: int = 0
+    estimated_cost_usd: float = 0.0
+    celery_task_id: Optional[str] = None
+    priority: int = 0
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+
+class Timeline(SQLModel, table=True):
+    """The frame sequence from a source video."""
+    __tablename__ = "timelines"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    source_asset_id: Optional[UUID] = Field(default=None, index=True)
+    source_video: str = ""
+    profile_name: str = ""
+    fps: float = 2.0
+    frames_prefix: str = ""
+    frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+
+class Checkpoint(SQLModel, table=True):
+    """A snapshot of pipeline state on a timeline."""
+    __tablename__ = "checkpoints"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    timeline_id: UUID
+    parent_id: Optional[UUID] = None
+    stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
+    is_scenario: bool = False
+    scenario_label: str = ""
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+
+class Brand(SQLModel, table=True):
+    """A brand discovered or registered in the system."""
+    __tablename__ = "brands"
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+    canonical_name: str = Field(index=True)
+    aliases: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
+    source: BrandSource = "ocr"
+    confirmed: bool = False
+    airings: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
+    total_airings: int = 0
+    created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
+    updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
--- a/core/schema/models/detect.py
+++ b/core/schema/models/detect.py
@@ -94,6 +94,9 @@ class StatsUpdate:
    cloud_llm_calls: int = 0
    processing_time_seconds: float = 0.0
    estimated_cloud_cost_usd: float = 0.0
+    run_id: Optional[str] = None
+    parent_job_id: Optional[str] = None
+    run_type: str = "initial"


@dataclass
--- a/detect/sse_contract.py
+++ b/detect/sse_contract.py
@@ -63,6 +63,9 @@ class StatsUpdate(BaseModel):
    cloud_llm_calls: int = 0
    processing_time_seconds: float = 0.0
    estimated_cloud_cost_usd: float = 0.0
+    run_id: Optional[str] = None
+    parent_job_id: Optional[str] = None
+    run_type: str = "initial"

 class FrameUpdate(BaseModel):
    """Current frame being processed. SSE event: frame_update"""
@@ -113,6 +116,8 @@ class RunContext(BaseModel):
 class CheckpointInfo(BaseModel):
    """Available checkpoint for a stage."""
    stage: str
+    is_scenario: bool = False
+    scenario_label: str = ""

 class ReplayRequest(BaseModel):
    """Request to replay pipeline from a specific stage."""
--- a/ui/common/types/generated.ts
+++ b/ui/common/types/generated.ts
@@ -5,9 +5,7 @@
 */

 export type AssetStatus = "pending" | "ready" | "error";
-export type JobStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
-export type ChunkJobStatus = "pending" | "chunking" | "processing" | "collecting" | "completed" | "failed" | "cancelled";
-export type DetectJobStatus = "pending" | "running" | "paused" | "completed" | "failed" | "cancelled";
+export type JobStatus = "pending" | "running" | "paused" | "completed" | "failed" | "cancelled";
 export type RunType = "initial" | "replay" | "retry";
 export type BrandSource = "ocr" | "local_vlm" | "cloud_llm" | "manual";
 export type SourceType = "chunk_job" | "upload" | "device" | "stream";
@@ -54,63 +52,15 @@ export interface TranscodePreset {
  updated_at: string | null;
 }

-export interface TranscodeJob {
-  id: string;
-  source_asset_id: string;
-  preset_id: string | null;
-  preset_snapshot: Record<string, unknown>;
-  trim_start: number | null;
-  trim_end: number | null;
-  output_filename: string;
-  output_path: string | null;
-  output_asset_id: string | null;
-  status: JobStatus;
-  progress: number;
-  current_frame: number | null;
-  current_time: number | null;
-  speed: string | null;
-  error_message: string | null;
-  celery_task_id: string | null;
-  execution_arn: string | null;
-  priority: number;
-  created_at: string | null;
-  started_at: string | null;
-  completed_at: string | null;
-}
-
-export interface ChunkJob {
-  id: string;
-  source_asset_id: string;
-  chunk_duration: number;
-  num_workers: number;
-  max_retries: number;
-  processor_type: string;
-  status: ChunkJobStatus;
-  progress: number;
-  total_chunks: number;
-  processed_chunks: number;
-  failed_chunks: number;
-  retry_count: number;
-  error_message: string | null;
-  throughput_mbps: number | null;
-  elapsed_seconds: number | null;
-  celery_task_id: string | null;
-  priority: number;
-  created_at: string | null;
-  started_at: string | null;
-  completed_at: string | null;
-}
-
-export interface DetectJob {
+export interface Job {
  id: string;
  source_asset_id: string;
  video_path: string;
  profile_name: string;
-  parent_job_id: string | null;
+  parent_id: string | null;
  run_type: RunType;
-  replay_from_stage: string | null;
  config_overrides: Record<string, unknown>;
-  status: DetectJobStatus;
+  status: JobStatus;
  current_stage: string | null;
  progress: number;
  error_message: string | null;
@@ -125,51 +75,42 @@ export interface DetectJob {
  completed_at: string | null;
 }

-export interface StageCheckpoint {
+export interface Timeline {
  id: string;
-  job_id: string;
-  stage: string;
-  stage_index: number;
+  source_asset_id: string | null;
+  source_video: string;
+  profile_name: string;
+  fps: number;
  frames_prefix: string;
  frames_manifest: Record<string, unknown>;
  frames_meta: string[];
-  filtered_frame_sequences: number[];
-  boxes_by_frame: Record<string, unknown>;
-  text_candidates: string[];
-  unresolved_candidates: string[];
-  detections: string[];
-  stats: Record<string, unknown>;
-  config_snapshot: Record<string, unknown>;
-  config_overrides: Record<string, unknown>;
-  video_path: string;
-  profile_name: string;
  created_at: string | null;
 }

-export interface KnownBrand {
+export interface Checkpoint {
+  id: string;
+  timeline_id: string;
+  parent_id: string | null;
+  stage_outputs: Record<string, unknown>;
+  config_overrides: Record<string, unknown>;
+  stats: Record<string, unknown>;
+  is_scenario: boolean;
+  scenario_label: string;
+  created_at: string | null;
+}
+
+export interface Brand {
  id: string;
  canonical_name: string;
  aliases: string[];
-  first_source: BrandSource;
-  total_occurrences: number;
+  source: BrandSource;
  confirmed: boolean;
+  airings: string[];
+  total_airings: number;
  created_at: string | null;
  updated_at: string | null;
 }

-export interface SourceBrandSighting {
-  id: string;
-  source_asset_id: string;
-  brand_id: string;
-  brand_name: string;
-  first_seen_timestamp: number;
-  last_seen_timestamp: number;
-  occurrences: number;
-  detection_source: BrandSource;
-  avg_confidence: number;
-  created_at: string | null;
-}
-
 export interface CreateJobRequest {
  source_asset_id: string;
  preset_id: string | null;
--- a/ui/detection-app/src/App.vue
+++ b/ui/detection-app/src/App.vue
@@ -42,9 +42,9 @@ source.on<StatsUpdate>('stats_update', (e) => {
  stats.value = e
  if (!runContext.value && e.run_id) {
    runContext.value = {
-      run_id: (e as any).run_id,
-      parent_job_id: (e as any).parent_job_id,
-      run_type: (e as any).run_type ?? 'initial',
+      run_id: e.run_id!,
+      parent_job_id: e.parent_job_id!,
+      run_type: e.run_type ?? 'initial',
    }
  }
 })
@@ -267,8 +267,10 @@ const editorOverlays = ref<FrameOverlay[]>([])
 // Boxes from edge detection (local or server)
 const editorBoxes = ref<FrameBBox[]>([])

+type RegionBox = { x: number; y: number; w: number; h: number; confidence: number; label: string }
+
 function onReplayResult(result: {
-  regions_by_frame?: Record<string, unknown[]>
+  regions_by_frame?: Record<string, RegionBox[]>
  debug?: Record<string, { edge_overlay_b64: string; lines_overlay_b64: string; horizontal_count: number; pair_count: number }>
  frameWidth?: number
  frameHeight?: number
@@ -281,7 +283,7 @@ function onReplayResult(result: {
  // Merge incoming per-frame regions into accumulated store
  if (result.regions_by_frame) {
    for (const [seqStr, regions] of Object.entries(result.regions_by_frame)) {
-      allFrameRegions.value[Number(seqStr)] = regions as any[]
+      allFrameRegions.value[Number(seqStr)] = regions
    }
  }

--- a/ui/detection-app/src/types/sse-contract.ts
+++ b/ui/detection-app/src/types/sse-contract.ts
@@ -56,6 +56,9 @@ export interface StatsUpdate {
  cloud_llm_calls: number;
  processing_time_seconds: number;
  estimated_cloud_cost_usd: number;
+  run_id: string | null;
+  parent_job_id: string | null;
+  run_type: string;
 }

 export interface FrameUpdate {
--- a/ui/detection-app/src/types/store-state.ts
+++ b/ui/detection-app/src/types/store-state.ts
@@ -53,19 +53,7 @@ export interface PreprocessingConfigOverrides {
  contrast: boolean | null;
 }

-export interface RegionAnalysisConfigOverrides {
-  enabled: boolean | null;
-  edge_canny_low: number | null;
-  edge_canny_high: number | null;
-  edge_hough_threshold: number | null;
-  edge_hough_min_length: number | null;
-  edge_hough_max_gap: number | null;
-  edge_pair_max_distance: number | null;
-  edge_pair_min_distance: number | null;
-}
-
 export interface ConfigOverrides {
-  region_analysis: RegionAnalysisConfigOverrides | null;
  detection: DetectionConfigOverrides | null;
  ocr: OCRConfigOverrides | null;
  resolver: ResolverConfigOverrides | null;