This commit is contained in:
2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions

View File

@@ -30,19 +30,18 @@ from .timeline import Timeline
from .checkpoint import Checkpoint
from .brand import BrandSource, Brand
from .media import AssetStatus, MediaAsset
from .presets import BUILTIN_PRESETS, TranscodePreset
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
from .inference import INFERENCE_VIEWS # noqa: F401 — GPU inference server API types
from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS # noqa: F401
from .pipeline_config import StageRef, Edge, PipelineConfig, PIPELINE_CONFIG_VIEWS # noqa: F401
from .detect_api import RunRequest, RunResponse, DETECT_API_VIEWS # noqa: F401
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
from .sources import ChunkInfo, SourceJob, SourceType
from .profile import Profile
from .preset import BUILTIN_PRESETS, TranscodePreset
from .event import DETECT_VIEWS # noqa: F401
from .inference import INFERENCE_VIEWS # noqa: F401
from .ui_state import UI_STATE_VIEWS # noqa: F401
from .stage import STAGE_VIEWS # noqa: F401
from .view import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
from .source import ChunkInfo, SourceJob, SourceType
# Core domain models - generates SQLModel, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset,
Job, Timeline, Checkpoint, Brand]
Job, Timeline, Checkpoint, Brand, Profile]
# API request/response models
API_MODELS = [
@@ -75,55 +74,3 @@ GRPC_MESSAGES = [
ChunkStreamRequest,
ChunkPipelineEvent,
]
__all__ = [
# Models
"MediaAsset",
"TranscodePreset",
"Job",
"Timeline",
"Checkpoint",
# Enums
"AssetStatus",
"JobStatus",
"RunType",
"BrandSource",
"SourceType",
# Stages
"StageConfigField",
"StageIO",
"StageDefinition",
# API
"CreateJobRequest",
"UpdateAssetRequest",
"DeleteResult",
"ScanResult",
"SystemStatus",
# gRPC
"GRPC_SERVICE",
"JobRequest",
"JobResponse",
"ProgressRequest",
"ProgressUpdate",
"CancelRequest",
"CancelResponse",
"WorkerStatus",
"Empty",
"ChunkStreamRequest",
"ChunkPipelineEvent",
# Views
"ChunkEvent",
"WorkerEvent",
"PipelineStats",
"ChunkOutputFile",
# Sources
"SourceJob",
"ChunkInfo",
# For generator
"DATACLASSES",
"API_MODELS",
"ENUMS",
"VIEWS",
"GRPC_MESSAGES",
"BUILTIN_PRESETS",
]

View File

@@ -20,6 +20,7 @@ class Checkpoint:
id: UUID
timeline_id: UUID
job_id: Optional[UUID] = None # which job created this checkpoint
parent_id: Optional[UUID] = None # null = root checkpoint
# Stage outputs — JSONB per stage, opaque to the checkpoint layer

View File

@@ -1,31 +0,0 @@
"""
Detection API request/response models.
Source of truth for detection pipeline API shapes.
Generated to Pydantic via modelgen.
"""
from dataclasses import dataclass
@dataclass
class RunRequest:
"""Request body for launching a detection pipeline run."""
video_path: str # storage key
profile_name: str = "soccer_broadcast"
source_asset_id: str = ""
checkpoint: bool = True
skip_vlm: bool = False
skip_cloud: bool = False
log_level: str = "INFO" # INFO | DEBUG
@dataclass
class RunResponse:
"""Response after starting a pipeline run."""
status: str
job_id: str
video_path: str
DETECT_API_VIEWS = [RunRequest, RunResponse]

View File

@@ -214,6 +214,29 @@ class RetryResponse:
job_id: str
# --- API request/response ---
@dataclass
class RunRequest:
"""Request body for launching a detection pipeline run."""
video_path: str
profile_name: str = "soccer_broadcast"
source_asset_id: str = ""
checkpoint: bool = True
skip_vlm: bool = False
skip_cloud: bool = False
log_level: str = "INFO"
@dataclass
class RunResponse:
"""Response after starting a pipeline run."""
status: str
job_id: str
video_path: str
# --- Export lists for modelgen ---
DETECT_VIEWS = [
@@ -234,4 +257,6 @@ DETECT_VIEWS = [
ReplayResponse,
RetryRequest,
RetryResponse,
RunRequest,
RunResponse,
]

View File

@@ -160,6 +160,39 @@ class AnalyzeRegionsDebugResponse:
pair_count: int = 0
# --- Field Segmentation ---
@dataclass
class SegmentFieldRequest:
"""Request body for field segmentation."""
image: str # base64 JPEG
hue_low: int = 30
hue_high: int = 85
sat_low: int = 30
sat_high: int = 255
val_low: int = 30
val_high: int = 255
morph_kernel: int = 15
min_area_ratio: float = 0.05
@dataclass
class SegmentFieldResponse:
"""Response from field segmentation."""
boundary: List[List[int]] = field(default_factory=list)
coverage: float = 0.0
mask_b64: str = "" # binary mask as base64 PNG (for downstream stages)
@dataclass
class SegmentFieldDebugResponse:
"""Response from field segmentation with debug overlay."""
boundary: List[List[int]] = field(default_factory=list)
coverage: float = 0.0
mask_overlay_b64: str = ""
# --- Server Config ---
@@ -193,5 +226,8 @@ INFERENCE_VIEWS = [
RegionBox,
AnalyzeRegionsResponse,
AnalyzeRegionsDebugResponse,
SegmentFieldRequest,
SegmentFieldResponse,
SegmentFieldDebugResponse,
ConfigUpdate,
]

View File

@@ -38,6 +38,9 @@ class Job:
video_path: str
profile_name: str = "soccer_broadcast"
# Timeline — set after frame extraction, or upfront for replay jobs
timeline_id: Optional[UUID] = None
# Lineage
parent_id: Optional[UUID] = None
run_type: RunType = RunType.INITIAL

View File

@@ -1,87 +0,0 @@
"""
Detection pipeline runtime models.
These are the data structures that flow between pipeline stages.
They contain runtime types (np.ndarray) so modelgen skips them —
not generated to SQLModel or TypeScript.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Literal
import numpy as np
@dataclass
class Frame:
sequence: int
chunk_id: int
timestamp: float # position in video (seconds)
image: np.ndarray
perceptual_hash: str = ""
@dataclass
class BoundingBox:
x: int
y: int
w: int
h: int
confidence: float
label: str
@dataclass
class TextCandidate:
frame: Frame
bbox: BoundingBox
text: str
ocr_confidence: float
@dataclass
class BrandDetection:
brand: str
timestamp: float
duration: float
confidence: float
source: Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
bbox: BoundingBox | None = None
frame_ref: int | None = None
content_type: str = ""
@dataclass
class BrandStats:
total_appearances: int = 0
total_screen_time: float = 0.0
avg_confidence: float = 0.0
first_seen: float = 0.0
last_seen: float = 0.0
@dataclass
class PipelineStats:
frames_extracted: int = 0
frames_after_scene_filter: int = 0
cv_regions_detected: int = 0
regions_detected: int = 0
regions_resolved_by_ocr: int = 0
regions_escalated_to_local_vlm: int = 0
regions_escalated_to_cloud_llm: int = 0
auxiliary_detections: int = 0
cloud_llm_calls: int = 0
processing_time_seconds: float = 0.0
estimated_cloud_cost_usd: float = 0.0
@dataclass
class DetectionReport:
video_source: str
content_type: str
duration_seconds: float
brands: dict[str, BrandStats] = field(default_factory=dict)
timeline: list[BrandDetection] = field(default_factory=list)
pipeline_stats: PipelineStats = field(default_factory=PipelineStats)

View File

@@ -1,46 +0,0 @@
"""
Pipeline composition config — source of truth for graph topology.
Defines what stages run, in what order, with what branching.
Belongs to a profile. Persisted as JSONB.
The execution strategy (serial, parallel, distributed) is separate —
the runner reads this config and flattens it into a sequence for now.
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class StageRef:
"""Reference to a stage in the pipeline graph."""
name: str # stage name (matches StageDefinition.name)
branch: str = "trunk" # which branch this belongs to
execution_target: str = "local" # local | gpu | lambda | gcp
@dataclass
class Edge:
"""Connection between stages in the graph."""
source: str # stage name
target: str # stage name
condition: str = "" # empty = unconditional, otherwise a routing rule key
@dataclass
class PipelineConfig:
"""
Pipeline graph topology + routing rules.
Holder model — stages/edges define the graph shape,
routing_rules is a JSONB blob for decision tree logic.
"""
name: str
profile_name: str
stages: List[StageRef] = field(default_factory=list)
edges: List[Edge] = field(default_factory=list)
routing_rules: Dict[str, Any] = field(default_factory=dict)
PIPELINE_CONFIG_VIEWS = [StageRef, Edge, PipelineConfig]

View File

@@ -0,0 +1,30 @@
"""
Profile schema — source of truth for content type profiles.
A profile has two JSONB fields:
- pipeline: graph topology (stages, edges, routing rules)
- configs: per-stage config values keyed by stage name
Validated at read time using generated contracts (StageConfigField, PipelineConfig).
"""
from dataclasses import dataclass, field
from typing import Any, Dict
from uuid import UUID
@dataclass
class Profile:
"""
A content type profile.
Defines what pipeline to run and how each stage is configured.
Seed data inserted via JSON fixtures on startup.
"""
id: UUID
name: str
pipeline: Dict[str, Any] = field(default_factory=dict)
configs: Dict[str, Any] = field(default_factory=dict)
PROFILE_VIEWS = [Profile]

153
core/schema/models/stage.py Normal file
View File

@@ -0,0 +1,153 @@
"""
Stage & Pipeline Schema Definitions
Source of truth for:
- Stage metadata (StageDefinition, config fields, IO)
- Stage config shapes (FrameExtractionConfig, etc.)
- Pipeline topology (StageRef, Edge, PipelineConfig)
Generates: Pydantic (detect/contract.py), TypeScript via modelgen.
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
# --- Stage metadata ---
@dataclass
class StageConfigField:
"""A single tunable config parameter for the editor UI."""
name: str
type: str # "float", "int", "str", "bool"
default: Any
description: str = ""
min: Optional[float] = None
max: Optional[float] = None
options: Optional[List[str]] = None
@dataclass
class StageIO:
"""Declares what a stage reads and writes."""
reads: List[str] = field(default_factory=list)
writes: List[str] = field(default_factory=list)
optional_reads: List[str] = field(default_factory=list)
@dataclass
class StageDefinition:
"""Complete metadata for a pipeline stage."""
name: str
label: str
description: str
category: str = "detection"
io: StageIO = field(default_factory=StageIO)
config_fields: List[StageConfigField] = field(default_factory=list)
tracks_element: Optional[str] = None
# --- Stage config shapes ---
@dataclass
class FrameExtractionConfig:
fps: float = 2.0
max_frames: int = 500
@dataclass
class SceneFilterConfig:
hamming_threshold: int = 8
enabled: bool = True
@dataclass
class DetectionConfig:
model_name: str = "yolov8n.pt"
confidence_threshold: float = 0.3
target_classes: List[str] = field(default_factory=lambda: ["logo", "text"])
@dataclass
class OCRConfig:
languages: List[str] = field(default_factory=lambda: ["en"])
min_confidence: float = 0.5
@dataclass
class ResolverConfig:
fuzzy_threshold: int = 75
@dataclass
class RegionAnalysisConfig:
enabled: bool = True
edge_canny_low: int = 50
edge_canny_high: int = 150
edge_hough_threshold: int = 80
edge_hough_min_length: int = 100
edge_hough_max_gap: int = 10
edge_pair_max_distance: int = 200
edge_pair_min_distance: int = 15
@dataclass
class FieldSegmentationConfig:
enabled: bool = True
# HSV green range for pitch detection
hue_low: int = 30
hue_high: int = 85
sat_low: int = 30
sat_high: int = 255
val_low: int = 30
val_high: int = 255
# Morphology
morph_kernel: int = 15 # kernel size for close/open
min_area_ratio: float = 0.05 # minimum contour area as fraction of frame
# --- Pipeline topology ---
@dataclass
class StageRef:
"""Reference to a stage in the pipeline graph."""
name: str
branch: str = "trunk"
execution_target: str = "local"
@dataclass
class Edge:
"""Connection between stages in the graph."""
source: str
target: str
condition: str = ""
@dataclass
class PipelineConfig:
"""Pipeline graph topology + routing rules."""
name: str
profile_name: str
stages: List[StageRef] = field(default_factory=list)
edges: List[Edge] = field(default_factory=list)
routing_rules: Dict[str, Any] = field(default_factory=dict)
# --- Export for modelgen ---
STAGE_VIEWS = [
StageConfigField,
StageIO,
StageDefinition,
FrameExtractionConfig,
SceneFilterConfig,
DetectionConfig,
OCRConfig,
ResolverConfig,
RegionAnalysisConfig,
FieldSegmentationConfig,
StageRef,
Edge,
PipelineConfig,
]

View File

@@ -1,69 +0,0 @@
"""
Stage Schema Definitions
Source of truth for pipeline stage metadata.
Generates: Pydantic, TypeScript via modelgen.
Each stage is defined by its config fields. The implementation
lives in detect/stages/<name>.py as a Stage subclass.
"""
from dataclasses import dataclass, field
from typing import Any, List, Optional
@dataclass
class StageConfigField:
"""A single tunable config parameter for the editor UI."""
name: str
type: str # "float", "int", "str", "bool"
default: Any
description: str = ""
min: Optional[float] = None
max: Optional[float] = None
options: Optional[List[str]] = None
@dataclass
class StageIO:
"""Declares what a stage reads and writes."""
reads: List[str] = field(default_factory=list)
writes: List[str] = field(default_factory=list)
optional_reads: List[str] = field(default_factory=list)
@dataclass
class StageDefinition:
"""
Complete metadata for a pipeline stage.
Lives in schema as the source of truth. Each stage implementation
references a StageDefinition. The editor, graph, and checkpoint
system all consume this.
"""
name: str
label: str
description: str
category: str = "detection"
io: StageIO = field(default_factory=StageIO)
config_fields: List[StageConfigField] = field(default_factory=list)
# The box label this stage produces that should be time-tracked in the editor.
# Set to the label string (e.g. "edge_region") for stages that have a
# meaningful temporal element. None means no motion tracker overlay.
tracks_element: Optional[str] = None
# Legacy fields — used by old registry pattern during migration.
# New stages use Stage subclass instead.
fn: Any = None
serialize_fn: Any = None
deserialize_fn: Any = None
# --- Export for modelgen ---
STAGE_VIEWS = [
StageConfigField,
StageIO,
StageDefinition,
]