Files
mediaproc/core/schema/models/event.py
2026-03-30 07:22:14 -03:00

263 lines
5.4 KiB
Python

"""
Detection Pipeline Schema Definitions
Source of truth for all detection SSE events and wire-format models.
Generates: Pydantic (detect/sse_contract.py), TypeScript (ui/detection-app/src/types/sse-contract.ts)
Pipeline-internal models that never cross the wire (e.g. Frame with np.ndarray)
live in detect/models.py and are NOT generated.
"""
from dataclasses import dataclass, field
from typing import List, Literal, Optional
# --- Enums as Literal unions (wire format, not Python Enum) ---
NodeStatus = Literal["idle", "processing", "completed", "error"]
DetectionSource = Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
# --- Nested components ---
@dataclass
class GraphNode:
"""A pipeline stage node."""
id: str
status: str = "idle" # NodeStatus
items_in: int = 0
items_out: int = 0
@dataclass
class GraphEdge:
"""An edge between pipeline stages."""
source: str
target: str
throughput: int = 0
@dataclass
class BoundingBoxEvent:
"""Bounding box in SSE event payloads."""
x: int
y: int
w: int
h: int
confidence: float
label: str
resolved_brand: Optional[str] = None
source: Optional[str] = None
stage: Optional[str] = None
@dataclass
class BrandSummary:
"""Per-brand stats in the final report."""
brand: str
total_appearances: int = 0
total_screen_time: float = 0.0
avg_confidence: float = 0.0
first_seen: float = 0.0
last_seen: float = 0.0
# --- SSE event payloads ---
@dataclass
class GraphUpdate:
"""Pipeline node state transition. SSE event: graph_update"""
nodes: List[GraphNode] = field(default_factory=list)
edges: List[GraphEdge] = field(default_factory=list)
active_path: List[str] = field(default_factory=list)
@dataclass
class StatsUpdate:
"""Funnel statistics snapshot. SSE event: stats_update"""
frames_extracted: int = 0
frames_after_scene_filter: int = 0
cv_regions_detected: int = 0
regions_detected: int = 0
regions_resolved_by_ocr: int = 0
regions_escalated_to_local_vlm: int = 0
regions_escalated_to_cloud_llm: int = 0
cloud_llm_calls: int = 0
processing_time_seconds: float = 0.0
estimated_cloud_cost_usd: float = 0.0
run_id: Optional[str] = None
parent_job_id: Optional[str] = None
run_type: str = "initial"
@dataclass
class FrameUpdate:
"""Current frame being processed. SSE event: frame_update"""
frame_ref: int
timestamp: float
jpeg_b64: str
boxes: List[BoundingBoxEvent] = field(default_factory=list)
@dataclass
class Detection:
"""A confirmed brand detection. SSE event: detection"""
brand: str
timestamp: float
duration: float
confidence: float
source: str # DetectionSource
content_type: str
bbox: Optional[BoundingBoxEvent] = None
frame_ref: Optional[int] = None
@dataclass
class LogEvent:
"""Pipeline log line. SSE event: log"""
level: str # LogLevel
stage: str
msg: str
ts: str
trace_id: Optional[str] = None
@dataclass
class DetectionReportSummary:
"""Final detection report summary."""
video_source: str
content_type: str
duration_seconds: float
total_detections: int = 0
brands: List[BrandSummary] = field(default_factory=list)
stats: Optional[StatsUpdate] = None
@dataclass
class JobComplete:
"""Final report when pipeline finishes. SSE event: job_complete"""
job_id: str
report: Optional[DetectionReportSummary] = None
@dataclass
class RunContext:
"""Run context injected into all SSE events for grouping."""
run_id: str
parent_job_id: str
run_type: str = "initial" # initial | replay | retry
# --- Checkpoint API types ---
@dataclass
class CheckpointInfo:
"""Available checkpoint for a stage."""
stage: str
is_scenario: bool = False
scenario_label: str = ""
@dataclass
class ReplayRequest:
"""Request to replay pipeline from a specific stage."""
job_id: str
start_stage: str
config_overrides: Optional[dict] = None
@dataclass
class ReplayResponse:
"""Result of a replay invocation."""
status: str
job_id: str
start_stage: str
detections: int = 0
brands_found: int = 0
@dataclass
class RetryRequest:
"""Request to queue async retry with different config."""
job_id: str
config_overrides: Optional[dict] = None
start_stage: str = "escalate_vlm"
schedule_seconds: Optional[float] = None
@dataclass
class RetryResponse:
"""Result of queueing a retry task."""
status: str
task_id: str
job_id: str
# --- API request/response ---
@dataclass
class RunRequest:
"""Request body for launching a detection pipeline run."""
video_path: str
profile_name: str = "soccer_broadcast"
source_asset_id: str = ""
checkpoint: bool = True
skip_vlm: bool = False
skip_cloud: bool = False
log_level: str = "INFO"
@dataclass
class RunResponse:
"""Response after starting a pipeline run."""
status: str
job_id: str
video_path: str
# --- Export lists for modelgen ---
DETECT_VIEWS = [
GraphNode,
GraphEdge,
BoundingBoxEvent,
BrandSummary,
GraphUpdate,
StatsUpdate,
FrameUpdate,
Detection,
LogEvent,
DetectionReportSummary,
JobComplete,
RunContext,
CheckpointInfo,
ReplayRequest,
ReplayResponse,
RetryRequest,
RetryResponse,
RunRequest,
RunResponse,
]