phase 10

2026-03-26 04:24:32 -03:00
parent 08b67f2bb7
commit 08c58a6a9d
43 changed files with 2627 additions and 252 deletions
--- a/core/schema/models/init.py
+++ b/core/schema/models/init.py
@@ -26,13 +26,18 @@ from .grpc import (
    WorkerStatus,
 )
 from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
+from .detect_jobs import (
+    DetectJob, DetectJobStatus, RunType, StageCheckpoint,
+    BrandSource, KnownBrand, SourceBrandSighting,
+)
 from .media import AssetStatus, MediaAsset
 from .presets import BUILTIN_PRESETS, TranscodePreset
 from .detect import DETECT_VIEWS  # noqa: F401 — discovered by modelgen generic loader
 from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent

 # Core domain models - generates Django, Pydantic, TypeScript
-DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob]
+DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
+               DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting]

 # API request/response models - generates TypeScript only (no Django)
 # WorkerStatus from grpc.py is reused here
@@ -46,7 +51,7 @@ API_MODELS = [
 ]

 # Status enums - included in generated code
-ENUMS = [AssetStatus, JobStatus, ChunkJobStatus]
+ENUMS = [AssetStatus, JobStatus, ChunkJobStatus, DetectJobStatus, RunType, BrandSource]

 # View/event models - generates TypeScript for UI consumption
 VIEWS = [ChunkEvent, WorkerEvent, PipelineStats, ChunkOutputFile]
--- a/core/schema/models/detect.py
+++ b/core/schema/models/detect.py
@@ -149,6 +149,64 @@ class JobComplete:
    report: Optional[DetectionReportSummary] = None


+@dataclass
+class RunContext:
+    """Run context injected into all SSE events for grouping."""
+
+    run_id: str
+    parent_job_id: str
+    run_type: str = "initial"  # initial | replay | retry
+
+
+# --- Checkpoint API types ---
+
+
+@dataclass
+class CheckpointInfo:
+    """Available checkpoint for a stage."""
+
+    stage: str
+
+
+@dataclass
+class ReplayRequest:
+    """Request to replay pipeline from a specific stage."""
+
+    job_id: str
+    start_stage: str
+    config_overrides: Optional[dict] = None
+
+
+@dataclass
+class ReplayResponse:
+    """Result of a replay invocation."""
+
+    status: str
+    job_id: str
+    start_stage: str
+    detections: int = 0
+    brands_found: int = 0
+
+
+@dataclass
+class RetryRequest:
+    """Request to queue async retry with different config."""
+
+    job_id: str
+    config_overrides: Optional[dict] = None
+    start_stage: str = "escalate_vlm"
+    schedule_seconds: Optional[float] = None
+
+
+@dataclass
+class RetryResponse:
+    """Result of queueing a retry task."""
+
+    status: str
+    task_id: str
+    job_id: str
+
+
 # --- Export lists for modelgen ---

 DETECT_VIEWS = [
@@ -163,4 +221,10 @@ DETECT_VIEWS = [
    LogEvent,
    DetectionReportSummary,
    JobComplete,
+    RunContext,
+    CheckpointInfo,
+    ReplayRequest,
+    ReplayResponse,
+    RetryRequest,
+    RetryResponse,
 ]
--- a/core/schema/models/detect_jobs.py
+++ b/core/schema/models/detect_jobs.py
@@ -0,0 +1,162 @@
+"""
+Detection Job and Checkpoint Schema Definitions
+
+Source of truth for detection pipeline job tracking and stage checkpoints.
+Follows the TranscodeJob/ChunkJob pattern.
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from uuid import UUID
+
+
+class DetectJobStatus(str, Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    PAUSED = "paused"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+
+class RunType(str, Enum):
+    INITIAL = "initial"
+    REPLAY = "replay"
+    RETRY = "retry"
+
+
+@dataclass
+class DetectJob:
+    """
+    A detection pipeline job.
+
+    Each invocation of the pipeline (initial run, replay, retry) creates a DetectJob.
+    Jobs for the same source video are linked via parent_job_id.
+    """
+
+    id: UUID
+
+    # Input
+    source_asset_id: UUID
+    video_path: str
+    profile_name: str = "soccer_broadcast"
+
+    # Run lineage
+    parent_job_id: Optional[UUID] = None  # links all runs for the same source
+    run_type: RunType = RunType.INITIAL
+    replay_from_stage: Optional[str] = None  # null for initial runs
+    config_overrides: Dict[str, Any] = field(default_factory=dict)
+
+    # Status
+    status: DetectJobStatus = DetectJobStatus.PENDING
+    current_stage: Optional[str] = None
+    progress: float = 0.0
+    error_message: Optional[str] = None
+
+    # Results summary
+    total_detections: int = 0
+    brands_found: int = 0
+    cloud_llm_calls: int = 0
+    estimated_cost_usd: float = 0.0
+
+    # Worker tracking
+    celery_task_id: Optional[str] = None
+    priority: int = 0
+
+    # Timestamps
+    created_at: Optional[datetime] = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+
+
+@dataclass
+class StageCheckpoint:
+    """
+    A checkpoint saved after a pipeline stage completes.
+
+    Binary data (frame images, crops) goes to S3/MinIO.
+    Everything else (structured state) lives here in Postgres.
+    """
+
+    id: UUID
+    job_id: UUID
+    stage: str
+    stage_index: int  # position in NODES list (0-7)
+
+    # S3 reference for binary data only
+    frames_prefix: str = ""  # s3 prefix: checkpoints/{job_id}/frames/
+
+    # Frame metadata (non-image fields)
+    frames_manifest: Dict[int, str] = field(default_factory=dict)  # seq → s3 key
+    frames_meta: List[Dict[str, Any]] = field(default_factory=list)  # sequence, chunk_id, timestamp, hash
+    filtered_frame_sequences: List[int] = field(default_factory=list)
+
+    # Detection state (full structured data, not just summaries)
+    boxes_by_frame: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
+    text_candidates: List[Dict[str, Any]] = field(default_factory=list)
+    unresolved_candidates: List[Dict[str, Any]] = field(default_factory=list)
+    detections: List[Dict[str, Any]] = field(default_factory=list)
+
+    # Pipeline state
+    stats: Dict[str, Any] = field(default_factory=dict)
+    config_snapshot: Dict[str, Any] = field(default_factory=dict)
+    config_overrides: Dict[str, Any] = field(default_factory=dict)
+
+    # Input refs (for replay)
+    video_path: str = ""
+    profile_name: str = ""
+
+    # Timestamps
+    created_at: Optional[datetime] = None
+
+
+class BrandSource(str, Enum):
+    """How a brand was first identified."""
+    OCR = "ocr"
+    VLM = "local_vlm"
+    CLOUD = "cloud_llm"
+    MANUAL = "manual"  # user-added via UI
+
+
+@dataclass
+class KnownBrand:
+    """
+    A brand discovered or registered in the system.
+
+    Global — not per-source. Accumulates across all pipeline runs.
+    Aliases enable fuzzy matching without re-escalating to VLM.
+    """
+
+    id: UUID
+    canonical_name: str              # normalized display name
+    aliases: List[str] = field(default_factory=list)  # known spellings/variants
+    first_source: BrandSource = BrandSource.OCR
+    total_occurrences: int = 0
+    confirmed: bool = False          # manually confirmed by user
+
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+
+
+@dataclass
+class SourceBrandSighting:
+    """
+    A brand seen in a specific source (video/asset).
+
+    Per-source session cache — avoids re-escalating the same brand
+    on subsequent frames or re-runs of the same source.
+    """
+
+    id: UUID
+    source_asset_id: UUID            # the video this sighting belongs to
+    brand_id: UUID                   # FK to KnownBrand
+    brand_name: str                  # denormalized for fast lookup
+    first_seen_timestamp: float = 0.0
+    last_seen_timestamp: float = 0.0
+    occurrences: int = 0
+    detection_source: BrandSource = BrandSource.OCR
+    avg_confidence: float = 0.0
+
+    created_at: Optional[datetime] = None