159 lines
3.9 KiB
Python
159 lines
3.9 KiB
Python
"""
|
|
Job, Timeline, and Checkpoint Schema Definitions
|
|
|
|
Source of truth for pipeline jobs, timelines, and checkpoints.
|
|
Generates: SQLModel (core/db/models.py), TypeScript via modelgen.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
from uuid import UUID
|
|
|
|
|
|
class JobStatus(str, Enum):
|
|
PENDING = "pending"
|
|
RUNNING = "running"
|
|
PAUSED = "paused"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
CANCELLED = "cancelled"
|
|
|
|
|
|
class RunType(str, Enum):
|
|
INITIAL = "initial"
|
|
REPLAY = "replay"
|
|
RETRY = "retry"
|
|
|
|
|
|
@dataclass
|
|
class Job:
|
|
"""
|
|
A pipeline job.
|
|
|
|
Each invocation (initial run, replay, retry) creates a Job.
|
|
Jobs for the same source are linked via parent_id.
|
|
"""
|
|
|
|
id: UUID
|
|
|
|
# Input
|
|
source_asset_id: UUID
|
|
video_path: str
|
|
profile_name: str = "soccer_broadcast"
|
|
|
|
# Lineage
|
|
parent_id: Optional[UUID] = None
|
|
run_type: RunType = RunType.INITIAL
|
|
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# Status
|
|
status: JobStatus = JobStatus.PENDING
|
|
current_stage: Optional[str] = None
|
|
progress: float = 0.0
|
|
error_message: Optional[str] = None
|
|
|
|
# Results summary
|
|
total_detections: int = 0
|
|
brands_found: int = 0
|
|
cloud_llm_calls: int = 0
|
|
estimated_cost_usd: float = 0.0
|
|
|
|
# Worker tracking
|
|
celery_task_id: Optional[str] = None
|
|
priority: int = 0
|
|
|
|
# Timestamps
|
|
created_at: Optional[datetime] = None
|
|
started_at: Optional[datetime] = None
|
|
completed_at: Optional[datetime] = None
|
|
|
|
|
|
@dataclass
|
|
class Timeline:
|
|
"""
|
|
The frame sequence from a source video.
|
|
|
|
Independent of stages — exists before any stage runs.
|
|
Frames stored in MinIO as JPEGs, metadata here.
|
|
One timeline per job.
|
|
"""
|
|
|
|
id: UUID
|
|
source_asset_id: Optional[UUID] = None
|
|
source_video: str = ""
|
|
profile_name: str = ""
|
|
fps: float = 2.0
|
|
|
|
frames_prefix: str = "" # s3: timeline/{id}/frames/
|
|
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
|
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
|
|
|
created_at: Optional[datetime] = None
|
|
|
|
|
|
@dataclass
|
|
class Checkpoint:
|
|
"""
|
|
A snapshot of pipeline state on a timeline.
|
|
|
|
Stage outputs stored as JSONB — each stage serializes to JSON,
|
|
the checkpoint stores it without knowing the shape.
|
|
|
|
parent_id forms a tree: multiple children from the same parent
|
|
= different config tries from the same starting point.
|
|
"""
|
|
|
|
id: UUID
|
|
timeline_id: UUID
|
|
parent_id: Optional[UUID] = None # null = root checkpoint
|
|
|
|
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
|
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# Config that produced this checkpoint
|
|
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# Pipeline state
|
|
stats: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# Scenario bookmark
|
|
is_scenario: bool = False
|
|
scenario_label: str = ""
|
|
|
|
created_at: Optional[datetime] = None
|
|
|
|
|
|
# --- Brands ---
|
|
|
|
class BrandSource(str, Enum):
|
|
OCR = "ocr"
|
|
VLM = "local_vlm"
|
|
CLOUD = "cloud_llm"
|
|
MANUAL = "manual"
|
|
|
|
|
|
@dataclass
|
|
class Brand:
|
|
"""
|
|
A brand discovered or registered in the system.
|
|
|
|
Airings track where/when the brand appeared — each airing
|
|
references a timeline and a frame range.
|
|
"""
|
|
|
|
id: UUID
|
|
canonical_name: str
|
|
aliases: List[str] = field(default_factory=list)
|
|
source: BrandSource = BrandSource.OCR # how first discovered
|
|
confirmed: bool = False
|
|
|
|
# Airings — JSONB array of appearances
|
|
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
|
|
airings: List[Dict[str, Any]] = field(default_factory=list)
|
|
total_airings: int = 0
|
|
|
|
created_at: Optional[datetime] = None
|
|
updated_at: Optional[datetime] = None
|