phase 4

2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions
--- a/core/detect/models.py
+++ b/core/detect/models.py
@@ -0,0 +1,95 @@
+"""
+Detection pipeline runtime models.
+
+These are the data structures that flow between pipeline stages.
+They contain runtime types (np.ndarray) so they live here, not in
+core/schema/models/ (which is for modelgen source of truth).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Literal
+
+import numpy as np
+
+
+@dataclass
+class Frame:
+    sequence: int
+    chunk_id: int
+    timestamp: float  # position in video (seconds)
+    image: np.ndarray
+    perceptual_hash: str = ""
+
+
+@dataclass
+class BoundingBox:
+    x: int
+    y: int
+    w: int
+    h: int
+    confidence: float
+    label: str
+
+
+@dataclass
+class TextCandidate:
+    frame: Frame
+    bbox: BoundingBox
+    text: str
+    ocr_confidence: float
+
+
+@dataclass
+class BrandDetection:
+    brand: str
+    timestamp: float
+    duration: float
+    confidence: float
+    source: Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
+    bbox: BoundingBox | None = None
+    frame_ref: int | None = None
+    content_type: str = ""
+
+
+@dataclass
+class BrandStats:
+    total_appearances: int = 0
+    total_screen_time: float = 0.0
+    avg_confidence: float = 0.0
+    first_seen: float = 0.0
+    last_seen: float = 0.0
+
+
+@dataclass
+class PipelineStats:
+    frames_extracted: int = 0
+    frames_after_scene_filter: int = 0
+    cv_regions_detected: int = 0
+    regions_detected: int = 0
+    regions_resolved_by_ocr: int = 0
+    regions_escalated_to_local_vlm: int = 0
+    regions_escalated_to_cloud_llm: int = 0
+    auxiliary_detections: int = 0
+    cloud_llm_calls: int = 0
+    processing_time_seconds: float = 0.0
+    estimated_cloud_cost_usd: float = 0.0
+
+
+@dataclass
+class DetectionReport:
+    video_source: str
+    content_type: str
+    duration_seconds: float
+    brands: dict[str, BrandStats] = field(default_factory=dict)
+    timeline: list[BrandDetection] = field(default_factory=list)
+    pipeline_stats: PipelineStats = field(default_factory=PipelineStats)
+
+
+@dataclass
+class CropContext:
+    """Runtime type — holds image bytes for VLM prompts."""
+    image: bytes
+    surrounding_text: str = ""
+    position_hint: str = ""