use sqlalchemy pattern

This commit is contained in:
2026-03-27 05:19:45 -03:00
parent 291ac8dd40
commit bcf6f3dc71
14 changed files with 451 additions and 669 deletions

158
core/schema/models/job.py Normal file
View File

@@ -0,0 +1,158 @@
"""
Job, Timeline, and Checkpoint Schema Definitions
Source of truth for pipeline jobs, timelines, and checkpoints.
Generates: SQLModel (core/db/models.py), TypeScript via modelgen.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID
class JobStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
PAUSED = "paused"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class RunType(str, Enum):
INITIAL = "initial"
REPLAY = "replay"
RETRY = "retry"
@dataclass
class Job:
"""
A pipeline job.
Each invocation (initial run, replay, retry) creates a Job.
Jobs for the same source are linked via parent_id.
"""
id: UUID
# Input
source_asset_id: UUID
video_path: str
profile_name: str = "soccer_broadcast"
# Lineage
parent_id: Optional[UUID] = None
run_type: RunType = RunType.INITIAL
config_overrides: Dict[str, Any] = field(default_factory=dict)
# Status
status: JobStatus = JobStatus.PENDING
current_stage: Optional[str] = None
progress: float = 0.0
error_message: Optional[str] = None
# Results summary
total_detections: int = 0
brands_found: int = 0
cloud_llm_calls: int = 0
estimated_cost_usd: float = 0.0
# Worker tracking
celery_task_id: Optional[str] = None
priority: int = 0
# Timestamps
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@dataclass
class Timeline:
"""
The frame sequence from a source video.
Independent of stages — exists before any stage runs.
Frames stored in MinIO as JPEGs, metadata here.
One timeline per job.
"""
id: UUID
source_asset_id: Optional[UUID] = None
source_video: str = ""
profile_name: str = ""
fps: float = 2.0
frames_prefix: str = "" # s3: timeline/{id}/frames/
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
created_at: Optional[datetime] = None
@dataclass
class Checkpoint:
"""
A snapshot of pipeline state on a timeline.
Stage outputs stored as JSONB — each stage serializes to JSON,
the checkpoint stores it without knowing the shape.
parent_id forms a tree: multiple children from the same parent
= different config tries from the same starting point.
"""
id: UUID
timeline_id: UUID
parent_id: Optional[UUID] = None # null = root checkpoint
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
stage_outputs: Dict[str, Any] = field(default_factory=dict)
# Config that produced this checkpoint
config_overrides: Dict[str, Any] = field(default_factory=dict)
# Pipeline state
stats: Dict[str, Any] = field(default_factory=dict)
# Scenario bookmark
is_scenario: bool = False
scenario_label: str = ""
created_at: Optional[datetime] = None
# --- Brands ---
class BrandSource(str, Enum):
OCR = "ocr"
VLM = "local_vlm"
CLOUD = "cloud_llm"
MANUAL = "manual"
@dataclass
class Brand:
"""
A brand discovered or registered in the system.
Airings track where/when the brand appeared — each airing
references a timeline and a frame range.
"""
id: UUID
canonical_name: str
aliases: List[str] = field(default_factory=list)
source: BrandSource = BrandSource.OCR # how first discovered
confirmed: bool = False
# Airings — JSONB array of appearances
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
airings: List[Dict[str, Any]] = field(default_factory=list)
total_airings: int = 0
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None