phase 4

2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions
--- a/core/schema/models/init.py
+++ b/core/schema/models/init.py
@@ -30,19 +30,18 @@ from .timeline import Timeline
 from .checkpoint import Checkpoint
 from .brand import BrandSource, Brand
 from .media import AssetStatus, MediaAsset
-from .presets import BUILTIN_PRESETS, TranscodePreset
-from .detect import DETECT_VIEWS  # noqa: F401 — discovered by modelgen generic loader
-from .inference import INFERENCE_VIEWS  # noqa: F401 — GPU inference server API types
-from .ui_state import UI_STATE_VIEWS  # noqa: F401 — UI store state types
-from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS  # noqa: F401
-from .pipeline_config import StageRef, Edge, PipelineConfig, PIPELINE_CONFIG_VIEWS  # noqa: F401
-from .detect_api import RunRequest, RunResponse, DETECT_API_VIEWS  # noqa: F401
-from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
-from .sources import ChunkInfo, SourceJob, SourceType
+from .profile import Profile
+from .preset import BUILTIN_PRESETS, TranscodePreset
+from .event import DETECT_VIEWS  # noqa: F401
+from .inference import INFERENCE_VIEWS  # noqa: F401
+from .ui_state import UI_STATE_VIEWS  # noqa: F401
+from .stage import STAGE_VIEWS  # noqa: F401
+from .view import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
+from .source import ChunkInfo, SourceJob, SourceType

 # Core domain models - generates SQLModel, TypeScript
 DATACLASSES = [MediaAsset, TranscodePreset,
-               Job, Timeline, Checkpoint, Brand]
+               Job, Timeline, Checkpoint, Brand, Profile]

 # API request/response models
 API_MODELS = [
@@ -75,55 +74,3 @@ GRPC_MESSAGES = [
    ChunkStreamRequest,
    ChunkPipelineEvent,
 ]
-
-__all__ = [
-    # Models
-    "MediaAsset",
-    "TranscodePreset",
-    "Job",
-    "Timeline",
-    "Checkpoint",
-    # Enums
-    "AssetStatus",
-    "JobStatus",
-    "RunType",
-    "BrandSource",
-    "SourceType",
-    # Stages
-    "StageConfigField",
-    "StageIO",
-    "StageDefinition",
-    # API
-    "CreateJobRequest",
-    "UpdateAssetRequest",
-    "DeleteResult",
-    "ScanResult",
-    "SystemStatus",
-    # gRPC
-    "GRPC_SERVICE",
-    "JobRequest",
-    "JobResponse",
-    "ProgressRequest",
-    "ProgressUpdate",
-    "CancelRequest",
-    "CancelResponse",
-    "WorkerStatus",
-    "Empty",
-    "ChunkStreamRequest",
-    "ChunkPipelineEvent",
-    # Views
-    "ChunkEvent",
-    "WorkerEvent",
-    "PipelineStats",
-    "ChunkOutputFile",
-    # Sources
-    "SourceJob",
-    "ChunkInfo",
-    # For generator
-    "DATACLASSES",
-    "API_MODELS",
-    "ENUMS",
-    "VIEWS",
-    "GRPC_MESSAGES",
-    "BUILTIN_PRESETS",
-]
--- a/core/schema/models/checkpoint.py
+++ b/core/schema/models/checkpoint.py
@@ -20,6 +20,7 @@ class Checkpoint:

    id: UUID
    timeline_id: UUID
+    job_id: Optional[UUID] = None  # which job created this checkpoint
    parent_id: Optional[UUID] = None  # null = root checkpoint

    # Stage outputs — JSONB per stage, opaque to the checkpoint layer
--- a/core/schema/models/detect_api.py
+++ b/core/schema/models/detect_api.py
@@ -1,31 +0,0 @@
-"""
-Detection API request/response models.
-
-Source of truth for detection pipeline API shapes.
-Generated to Pydantic via modelgen.
-"""
-
-from dataclasses import dataclass
-
-
-@dataclass
-class RunRequest:
-    """Request body for launching a detection pipeline run."""
-    video_path: str           # storage key
-    profile_name: str = "soccer_broadcast"
-    source_asset_id: str = ""
-    checkpoint: bool = True
-    skip_vlm: bool = False
-    skip_cloud: bool = False
-    log_level: str = "INFO"  # INFO | DEBUG
-
-
-@dataclass
-class RunResponse:
-    """Response after starting a pipeline run."""
-    status: str
-    job_id: str
-    video_path: str
-
-
-DETECT_API_VIEWS = [RunRequest, RunResponse]
--- a/core/schema/models/detect.py
+++ b/core/schema/models/detect.py
@@ -214,6 +214,29 @@ class RetryResponse:
    job_id: str


+# --- API request/response ---
+
+
+@dataclass
+class RunRequest:
+    """Request body for launching a detection pipeline run."""
+    video_path: str
+    profile_name: str = "soccer_broadcast"
+    source_asset_id: str = ""
+    checkpoint: bool = True
+    skip_vlm: bool = False
+    skip_cloud: bool = False
+    log_level: str = "INFO"
+
+
+@dataclass
+class RunResponse:
+    """Response after starting a pipeline run."""
+    status: str
+    job_id: str
+    video_path: str
+
+
 # --- Export lists for modelgen ---

 DETECT_VIEWS = [
@@ -234,4 +257,6 @@ DETECT_VIEWS = [
    ReplayResponse,
    RetryRequest,
    RetryResponse,
+    RunRequest,
+    RunResponse,
 ]
--- a/core/schema/models/inference.py
+++ b/core/schema/models/inference.py
@@ -160,6 +160,39 @@ class AnalyzeRegionsDebugResponse:
    pair_count: int = 0


+# --- Field Segmentation ---
+
+
+@dataclass
+class SegmentFieldRequest:
+    """Request body for field segmentation."""
+    image: str  # base64 JPEG
+    hue_low: int = 30
+    hue_high: int = 85
+    sat_low: int = 30
+    sat_high: int = 255
+    val_low: int = 30
+    val_high: int = 255
+    morph_kernel: int = 15
+    min_area_ratio: float = 0.05
+
+
+@dataclass
+class SegmentFieldResponse:
+    """Response from field segmentation."""
+    boundary: List[List[int]] = field(default_factory=list)
+    coverage: float = 0.0
+    mask_b64: str = ""  # binary mask as base64 PNG (for downstream stages)
+
+
+@dataclass
+class SegmentFieldDebugResponse:
+    """Response from field segmentation with debug overlay."""
+    boundary: List[List[int]] = field(default_factory=list)
+    coverage: float = 0.0
+    mask_overlay_b64: str = ""
+
+
 # --- Server Config ---


@@ -193,5 +226,8 @@ INFERENCE_VIEWS = [
    RegionBox,
    AnalyzeRegionsResponse,
    AnalyzeRegionsDebugResponse,
+    SegmentFieldRequest,
+    SegmentFieldResponse,
+    SegmentFieldDebugResponse,
    ConfigUpdate,
 ]
--- a/core/schema/models/job.py
+++ b/core/schema/models/job.py
@@ -38,6 +38,9 @@ class Job:
    video_path: str
    profile_name: str = "soccer_broadcast"

+    # Timeline — set after frame extraction, or upfront for replay jobs
+    timeline_id: Optional[UUID] = None
+
    # Lineage
    parent_id: Optional[UUID] = None
    run_type: RunType = RunType.INITIAL
--- a/core/schema/models/pipeline.py
+++ b/core/schema/models/pipeline.py
@@ -1,87 +0,0 @@
-"""
-Detection pipeline runtime models.
-
-These are the data structures that flow between pipeline stages.
-They contain runtime types (np.ndarray) so modelgen skips them —
-not generated to SQLModel or TypeScript.
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Literal
-
-import numpy as np
-
-
-@dataclass
-class Frame:
-    sequence: int
-    chunk_id: int
-    timestamp: float  # position in video (seconds)
-    image: np.ndarray
-    perceptual_hash: str = ""
-
-
-@dataclass
-class BoundingBox:
-    x: int
-    y: int
-    w: int
-    h: int
-    confidence: float
-    label: str
-
-
-@dataclass
-class TextCandidate:
-    frame: Frame
-    bbox: BoundingBox
-    text: str
-    ocr_confidence: float
-
-
-@dataclass
-class BrandDetection:
-    brand: str
-    timestamp: float
-    duration: float
-    confidence: float
-    source: Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
-    bbox: BoundingBox | None = None
-    frame_ref: int | None = None
-    content_type: str = ""
-
-
-@dataclass
-class BrandStats:
-    total_appearances: int = 0
-    total_screen_time: float = 0.0
-    avg_confidence: float = 0.0
-    first_seen: float = 0.0
-    last_seen: float = 0.0
-
-
-@dataclass
-class PipelineStats:
-    frames_extracted: int = 0
-    frames_after_scene_filter: int = 0
-    cv_regions_detected: int = 0
-    regions_detected: int = 0
-    regions_resolved_by_ocr: int = 0
-    regions_escalated_to_local_vlm: int = 0
-    regions_escalated_to_cloud_llm: int = 0
-    auxiliary_detections: int = 0
-    cloud_llm_calls: int = 0
-    processing_time_seconds: float = 0.0
-    estimated_cloud_cost_usd: float = 0.0
-
-
-@dataclass
-class DetectionReport:
-    video_source: str
-    content_type: str
-    duration_seconds: float
-    brands: dict[str, BrandStats] = field(default_factory=dict)
-    timeline: list[BrandDetection] = field(default_factory=list)
-    pipeline_stats: PipelineStats = field(default_factory=PipelineStats)
--- a/core/schema/models/pipeline_config.py
+++ b/core/schema/models/pipeline_config.py
@@ -1,46 +0,0 @@
-"""
-Pipeline composition config — source of truth for graph topology.
-
-Defines what stages run, in what order, with what branching.
-Belongs to a profile. Persisted as JSONB.
-
-The execution strategy (serial, parallel, distributed) is separate —
-the runner reads this config and flattens it into a sequence for now.
-"""
-
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-
-
-@dataclass
-class StageRef:
-    """Reference to a stage in the pipeline graph."""
-    name: str                          # stage name (matches StageDefinition.name)
-    branch: str = "trunk"              # which branch this belongs to
-    execution_target: str = "local"    # local | gpu | lambda | gcp
-
-
-@dataclass
-class Edge:
-    """Connection between stages in the graph."""
-    source: str        # stage name
-    target: str        # stage name
-    condition: str = ""  # empty = unconditional, otherwise a routing rule key
-
-
-@dataclass
-class PipelineConfig:
-    """
-    Pipeline graph topology + routing rules.
-
-    Holder model — stages/edges define the graph shape,
-    routing_rules is a JSONB blob for decision tree logic.
-    """
-    name: str
-    profile_name: str
-    stages: List[StageRef] = field(default_factory=list)
-    edges: List[Edge] = field(default_factory=list)
-    routing_rules: Dict[str, Any] = field(default_factory=dict)
-
-
-PIPELINE_CONFIG_VIEWS = [StageRef, Edge, PipelineConfig]
--- a/core/schema/models/presets.py
+++ b/core/schema/models/presets.py
--- a/core/schema/models/profile.py
+++ b/core/schema/models/profile.py
@@ -0,0 +1,30 @@
+"""
+Profile schema — source of truth for content type profiles.
+
+A profile has two JSONB fields:
+  - pipeline: graph topology (stages, edges, routing rules)
+  - configs: per-stage config values keyed by stage name
+
+Validated at read time using generated contracts (StageConfigField, PipelineConfig).
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict
+from uuid import UUID
+
+
+@dataclass
+class Profile:
+    """
+    A content type profile.
+
+    Defines what pipeline to run and how each stage is configured.
+    Seed data inserted via JSON fixtures on startup.
+    """
+    id: UUID
+    name: str
+    pipeline: Dict[str, Any] = field(default_factory=dict)
+    configs: Dict[str, Any] = field(default_factory=dict)
+
+
+PROFILE_VIEWS = [Profile]
--- a/core/schema/models/sources.py
+++ b/core/schema/models/sources.py
--- a/core/schema/models/stage.py
+++ b/core/schema/models/stage.py
@@ -0,0 +1,153 @@
+"""
+Stage & Pipeline Schema Definitions
+
+Source of truth for:
+- Stage metadata (StageDefinition, config fields, IO)
+- Stage config shapes (FrameExtractionConfig, etc.)
+- Pipeline topology (StageRef, Edge, PipelineConfig)
+
+Generates: Pydantic (detect/contract.py), TypeScript via modelgen.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+# --- Stage metadata ---
+
+@dataclass
+class StageConfigField:
+    """A single tunable config parameter for the editor UI."""
+    name: str
+    type: str          # "float", "int", "str", "bool"
+    default: Any
+    description: str = ""
+    min: Optional[float] = None
+    max: Optional[float] = None
+    options: Optional[List[str]] = None
+
+
+@dataclass
+class StageIO:
+    """Declares what a stage reads and writes."""
+    reads: List[str] = field(default_factory=list)
+    writes: List[str] = field(default_factory=list)
+    optional_reads: List[str] = field(default_factory=list)
+
+
+@dataclass
+class StageDefinition:
+    """Complete metadata for a pipeline stage."""
+    name: str
+    label: str
+    description: str
+    category: str = "detection"
+    io: StageIO = field(default_factory=StageIO)
+    config_fields: List[StageConfigField] = field(default_factory=list)
+    tracks_element: Optional[str] = None
+
+
+# --- Stage config shapes ---
+
+@dataclass
+class FrameExtractionConfig:
+    fps: float = 2.0
+    max_frames: int = 500
+
+
+@dataclass
+class SceneFilterConfig:
+    hamming_threshold: int = 8
+    enabled: bool = True
+
+
+@dataclass
+class DetectionConfig:
+    model_name: str = "yolov8n.pt"
+    confidence_threshold: float = 0.3
+    target_classes: List[str] = field(default_factory=lambda: ["logo", "text"])
+
+
+@dataclass
+class OCRConfig:
+    languages: List[str] = field(default_factory=lambda: ["en"])
+    min_confidence: float = 0.5
+
+
+@dataclass
+class ResolverConfig:
+    fuzzy_threshold: int = 75
+
+
+@dataclass
+class RegionAnalysisConfig:
+    enabled: bool = True
+    edge_canny_low: int = 50
+    edge_canny_high: int = 150
+    edge_hough_threshold: int = 80
+    edge_hough_min_length: int = 100
+    edge_hough_max_gap: int = 10
+    edge_pair_max_distance: int = 200
+    edge_pair_min_distance: int = 15
+
+
+@dataclass
+class FieldSegmentationConfig:
+    enabled: bool = True
+    # HSV green range for pitch detection
+    hue_low: int = 30
+    hue_high: int = 85
+    sat_low: int = 30
+    sat_high: int = 255
+    val_low: int = 30
+    val_high: int = 255
+    # Morphology
+    morph_kernel: int = 15       # kernel size for close/open
+    min_area_ratio: float = 0.05  # minimum contour area as fraction of frame
+
+
+# --- Pipeline topology ---
+
+@dataclass
+class StageRef:
+    """Reference to a stage in the pipeline graph."""
+    name: str
+    branch: str = "trunk"
+    execution_target: str = "local"
+
+
+@dataclass
+class Edge:
+    """Connection between stages in the graph."""
+    source: str
+    target: str
+    condition: str = ""
+
+
+@dataclass
+class PipelineConfig:
+    """Pipeline graph topology + routing rules."""
+    name: str
+    profile_name: str
+    stages: List[StageRef] = field(default_factory=list)
+    edges: List[Edge] = field(default_factory=list)
+    routing_rules: Dict[str, Any] = field(default_factory=dict)
+
+
+# --- Export for modelgen ---
+
+STAGE_VIEWS = [
+    StageConfigField,
+    StageIO,
+    StageDefinition,
+    FrameExtractionConfig,
+    SceneFilterConfig,
+    DetectionConfig,
+    OCRConfig,
+    ResolverConfig,
+    RegionAnalysisConfig,
+    FieldSegmentationConfig,
+    StageRef,
+    Edge,
+    PipelineConfig,
+]
--- a/core/schema/models/stages.py
+++ b/core/schema/models/stages.py
@@ -1,69 +0,0 @@
-"""
-Stage Schema Definitions
-
-Source of truth for pipeline stage metadata.
-Generates: Pydantic, TypeScript via modelgen.
-
-Each stage is defined by its config fields. The implementation
-lives in detect/stages/<name>.py as a Stage subclass.
-"""
-
-from dataclasses import dataclass, field
-from typing import Any, List, Optional
-
-
-@dataclass
-class StageConfigField:
-    """A single tunable config parameter for the editor UI."""
-    name: str
-    type: str          # "float", "int", "str", "bool"
-    default: Any
-    description: str = ""
-    min: Optional[float] = None
-    max: Optional[float] = None
-    options: Optional[List[str]] = None
-
-
-@dataclass
-class StageIO:
-    """Declares what a stage reads and writes."""
-    reads: List[str] = field(default_factory=list)
-    writes: List[str] = field(default_factory=list)
-    optional_reads: List[str] = field(default_factory=list)
-
-
-@dataclass
-class StageDefinition:
-    """
-    Complete metadata for a pipeline stage.
-
-    Lives in schema as the source of truth. Each stage implementation
-    references a StageDefinition. The editor, graph, and checkpoint
-    system all consume this.
-    """
-    name: str
-    label: str
-    description: str
-    category: str = "detection"
-    io: StageIO = field(default_factory=StageIO)
-    config_fields: List[StageConfigField] = field(default_factory=list)
-
-    # The box label this stage produces that should be time-tracked in the editor.
-    # Set to the label string (e.g. "edge_region") for stages that have a
-    # meaningful temporal element. None means no motion tracker overlay.
-    tracks_element: Optional[str] = None
-
-    # Legacy fields — used by old registry pattern during migration.
-    # New stages use Stage subclass instead.
-    fn: Any = None
-    serialize_fn: Any = None
-    deserialize_fn: Any = None
-
-
-# --- Export for modelgen ---
-
-STAGE_VIEWS = [
-    StageConfigField,
-    StageIO,
-    StageDefinition,
-]
--- a/core/schema/models/views.py
+++ b/core/schema/models/views.py