schema clean up and refactor

2026-03-26 05:14:33 -03:00
parent 08c58a6a9d
commit d58a90157a
17 changed files with 930 additions and 287 deletions
--- a/detect/models.py
+++ b/detect/models.py
@@ -1,86 +1,26 @@
 """
-Core domain models for the detection pipeline.
+Re-export pipeline runtime models from core/schema/models/detect_pipeline.py.

-These are pipeline-internal models — the data structures that flow
-between LangGraph nodes. SSE event payloads (sse_contract.py) are
-derived from these when emitting to the UI.
+All models are defined in core/schema/ — this module exists for backward
+compatibility so existing imports (from detect.models import Frame) keep working.
 """

-from __future__ import annotations
+from core.schema.models.detect_pipeline import (
+    BoundingBox,
+    BrandDetection,
+    BrandStats,
+    DetectionReport,
+    Frame,
+    PipelineStats,
+    TextCandidate,
+)

-from dataclasses import dataclass, field
-from typing import Literal
-
-import numpy as np
-
-
-@dataclass
-class Frame:
-    sequence: int
-    chunk_id: int
-    timestamp: float  # position in video (seconds)
-    image: np.ndarray
-    perceptual_hash: str = ""
-
-
-@dataclass
-class BoundingBox:
-    x: int
-    y: int
-    w: int
-    h: int
-    confidence: float
-    label: str
-
-
-@dataclass
-class TextCandidate:
-    frame: Frame
-    bbox: BoundingBox
-    text: str
-    ocr_confidence: float
-
-
-@dataclass
-class BrandDetection:
-    brand: str
-    timestamp: float
-    duration: float
-    confidence: float
-    source: Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
-    bbox: BoundingBox | None = None
-    frame_ref: int | None = None
-    content_type: str = ""
-
-
-@dataclass
-class BrandStats:
-    total_appearances: int = 0
-    total_screen_time: float = 0.0
-    avg_confidence: float = 0.0
-    first_seen: float = 0.0
-    last_seen: float = 0.0
-
-
-@dataclass
-class PipelineStats:
-    frames_extracted: int = 0
-    frames_after_scene_filter: int = 0
-    regions_detected: int = 0
-    regions_resolved_by_ocr: int = 0
-    regions_escalated_to_local_vlm: int = 0
-    regions_escalated_to_cloud_llm: int = 0
-    auxiliary_detections: int = 0
-    cloud_llm_calls: int = 0
-    processing_time_seconds: float = 0.0
-    estimated_cloud_cost_usd: float = 0.0
-
-
-@dataclass
-class DetectionReport:
-    video_source: str
-    content_type: str
-    duration_seconds: float
-    brands: dict[str, BrandStats] = field(default_factory=dict)
-    timeline: list[BrandDetection] = field(default_factory=list)
-    pipeline_stats: PipelineStats = field(default_factory=PipelineStats)
+__all__ = [
+    "BoundingBox",
+    "BrandDetection",
+    "BrandStats",
+    "DetectionReport",
+    "Frame",
+    "PipelineStats",
+    "TextCandidate",
+]