major refactor

This commit is contained in:
2026-03-27 06:02:58 -03:00
parent bcf6f3dc71
commit 51ce14a812
18 changed files with 351 additions and 523 deletions

View File

@@ -1,14 +1,9 @@
"""
Job, Timeline, and Checkpoint Schema Definitions
Source of truth for pipeline jobs, timelines, and checkpoints.
Generates: SQLModel (core/db/models.py), TypeScript via modelgen.
"""
"""Job schema — source of truth for pipeline jobs."""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Optional
from uuid import UUID
@@ -68,91 +63,3 @@ class Job:
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@dataclass
class Timeline:
"""
The frame sequence from a source video.
Independent of stages — exists before any stage runs.
Frames stored in MinIO as JPEGs, metadata here.
One timeline per job.
"""
id: UUID
source_asset_id: Optional[UUID] = None
source_video: str = ""
profile_name: str = ""
fps: float = 2.0
frames_prefix: str = "" # s3: timeline/{id}/frames/
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
created_at: Optional[datetime] = None
@dataclass
class Checkpoint:
"""
A snapshot of pipeline state on a timeline.
Stage outputs stored as JSONB — each stage serializes to JSON,
the checkpoint stores it without knowing the shape.
parent_id forms a tree: multiple children from the same parent
= different config tries from the same starting point.
"""
id: UUID
timeline_id: UUID
parent_id: Optional[UUID] = None # null = root checkpoint
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
stage_outputs: Dict[str, Any] = field(default_factory=dict)
# Config that produced this checkpoint
config_overrides: Dict[str, Any] = field(default_factory=dict)
# Pipeline state
stats: Dict[str, Any] = field(default_factory=dict)
# Scenario bookmark
is_scenario: bool = False
scenario_label: str = ""
created_at: Optional[datetime] = None
# --- Brands ---
class BrandSource(str, Enum):
OCR = "ocr"
VLM = "local_vlm"
CLOUD = "cloud_llm"
MANUAL = "manual"
@dataclass
class Brand:
"""
A brand discovered or registered in the system.
Airings track where/when the brand appeared — each airing
references a timeline and a frame range.
"""
id: UUID
canonical_name: str
aliases: List[str] = field(default_factory=list)
source: BrandSource = BrandSource.OCR # how first discovered
confirmed: bool = False
# Airings — JSONB array of appearances
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
airings: List[Dict[str, Any]] = field(default_factory=list)
total_airings: int = 0
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None