""" Job, Timeline, and Checkpoint Schema Definitions Source of truth for pipeline jobs, timelines, and checkpoints. Generates: SQLModel (core/db/models.py), TypeScript via modelgen. """ from dataclasses import dataclass, field from datetime import datetime from enum import Enum from typing import Any, Dict, List, Optional from uuid import UUID class JobStatus(str, Enum): PENDING = "pending" RUNNING = "running" PAUSED = "paused" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" class RunType(str, Enum): INITIAL = "initial" REPLAY = "replay" RETRY = "retry" @dataclass class Job: """ A pipeline job. Each invocation (initial run, replay, retry) creates a Job. Jobs for the same source are linked via parent_id. """ id: UUID # Input source_asset_id: UUID video_path: str profile_name: str = "soccer_broadcast" # Lineage parent_id: Optional[UUID] = None run_type: RunType = RunType.INITIAL config_overrides: Dict[str, Any] = field(default_factory=dict) # Status status: JobStatus = JobStatus.PENDING current_stage: Optional[str] = None progress: float = 0.0 error_message: Optional[str] = None # Results summary total_detections: int = 0 brands_found: int = 0 cloud_llm_calls: int = 0 estimated_cost_usd: float = 0.0 # Worker tracking celery_task_id: Optional[str] = None priority: int = 0 # Timestamps created_at: Optional[datetime] = None started_at: Optional[datetime] = None completed_at: Optional[datetime] = None @dataclass class Timeline: """ The frame sequence from a source video. Independent of stages — exists before any stage runs. Frames stored in MinIO as JPEGs, metadata here. One timeline per job. """ id: UUID source_asset_id: Optional[UUID] = None source_video: str = "" profile_name: str = "" fps: float = 2.0 frames_prefix: str = "" # s3: timeline/{id}/frames/ frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key frames_meta: List[Dict[str, Any]] = field(default_factory=list) created_at: Optional[datetime] = None @dataclass class Checkpoint: """ A snapshot of pipeline state on a timeline. Stage outputs stored as JSONB — each stage serializes to JSON, the checkpoint stores it without knowing the shape. parent_id forms a tree: multiple children from the same parent = different config tries from the same starting point. """ id: UUID timeline_id: UUID parent_id: Optional[UUID] = None # null = root checkpoint # Stage outputs — JSONB per stage, opaque to the checkpoint layer stage_outputs: Dict[str, Any] = field(default_factory=dict) # Config that produced this checkpoint config_overrides: Dict[str, Any] = field(default_factory=dict) # Pipeline state stats: Dict[str, Any] = field(default_factory=dict) # Scenario bookmark is_scenario: bool = False scenario_label: str = "" created_at: Optional[datetime] = None # --- Brands --- class BrandSource(str, Enum): OCR = "ocr" VLM = "local_vlm" CLOUD = "cloud_llm" MANUAL = "manual" @dataclass class Brand: """ A brand discovered or registered in the system. Airings track where/when the brand appeared — each airing references a timeline and a frame range. """ id: UUID canonical_name: str aliases: List[str] = field(default_factory=list) source: BrandSource = BrandSource.OCR # how first discovered confirmed: bool = False # Airings — JSONB array of appearances # [{timeline_id, frame_start, frame_end, confidence, source, timestamp}] airings: List[Dict[str, Any]] = field(default_factory=list) total_airings: int = 0 created_at: Optional[datetime] = None updated_at: Optional[datetime] = None