phase 4
This commit is contained in:
@@ -30,19 +30,18 @@ from .timeline import Timeline
|
||||
from .checkpoint import Checkpoint
|
||||
from .brand import BrandSource, Brand
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
from .inference import INFERENCE_VIEWS # noqa: F401 — GPU inference server API types
|
||||
from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
|
||||
from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS # noqa: F401
|
||||
from .pipeline_config import StageRef, Edge, PipelineConfig, PIPELINE_CONFIG_VIEWS # noqa: F401
|
||||
from .detect_api import RunRequest, RunResponse, DETECT_API_VIEWS # noqa: F401
|
||||
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
|
||||
from .sources import ChunkInfo, SourceJob, SourceType
|
||||
from .profile import Profile
|
||||
from .preset import BUILTIN_PRESETS, TranscodePreset
|
||||
from .event import DETECT_VIEWS # noqa: F401
|
||||
from .inference import INFERENCE_VIEWS # noqa: F401
|
||||
from .ui_state import UI_STATE_VIEWS # noqa: F401
|
||||
from .stage import STAGE_VIEWS # noqa: F401
|
||||
from .view import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
|
||||
from .source import ChunkInfo, SourceJob, SourceType
|
||||
|
||||
# Core domain models - generates SQLModel, TypeScript
|
||||
DATACLASSES = [MediaAsset, TranscodePreset,
|
||||
Job, Timeline, Checkpoint, Brand]
|
||||
Job, Timeline, Checkpoint, Brand, Profile]
|
||||
|
||||
# API request/response models
|
||||
API_MODELS = [
|
||||
@@ -75,55 +74,3 @@ GRPC_MESSAGES = [
|
||||
ChunkStreamRequest,
|
||||
ChunkPipelineEvent,
|
||||
]
|
||||
|
||||
__all__ = [
|
||||
# Models
|
||||
"MediaAsset",
|
||||
"TranscodePreset",
|
||||
"Job",
|
||||
"Timeline",
|
||||
"Checkpoint",
|
||||
# Enums
|
||||
"AssetStatus",
|
||||
"JobStatus",
|
||||
"RunType",
|
||||
"BrandSource",
|
||||
"SourceType",
|
||||
# Stages
|
||||
"StageConfigField",
|
||||
"StageIO",
|
||||
"StageDefinition",
|
||||
# API
|
||||
"CreateJobRequest",
|
||||
"UpdateAssetRequest",
|
||||
"DeleteResult",
|
||||
"ScanResult",
|
||||
"SystemStatus",
|
||||
# gRPC
|
||||
"GRPC_SERVICE",
|
||||
"JobRequest",
|
||||
"JobResponse",
|
||||
"ProgressRequest",
|
||||
"ProgressUpdate",
|
||||
"CancelRequest",
|
||||
"CancelResponse",
|
||||
"WorkerStatus",
|
||||
"Empty",
|
||||
"ChunkStreamRequest",
|
||||
"ChunkPipelineEvent",
|
||||
# Views
|
||||
"ChunkEvent",
|
||||
"WorkerEvent",
|
||||
"PipelineStats",
|
||||
"ChunkOutputFile",
|
||||
# Sources
|
||||
"SourceJob",
|
||||
"ChunkInfo",
|
||||
# For generator
|
||||
"DATACLASSES",
|
||||
"API_MODELS",
|
||||
"ENUMS",
|
||||
"VIEWS",
|
||||
"GRPC_MESSAGES",
|
||||
"BUILTIN_PRESETS",
|
||||
]
|
||||
|
||||
@@ -20,6 +20,7 @@ class Checkpoint:
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
job_id: Optional[UUID] = None # which job created this checkpoint
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
"""
|
||||
Detection API request/response models.
|
||||
|
||||
Source of truth for detection pipeline API shapes.
|
||||
Generated to Pydantic via modelgen.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunRequest:
|
||||
"""Request body for launching a detection pipeline run."""
|
||||
video_path: str # storage key
|
||||
profile_name: str = "soccer_broadcast"
|
||||
source_asset_id: str = ""
|
||||
checkpoint: bool = True
|
||||
skip_vlm: bool = False
|
||||
skip_cloud: bool = False
|
||||
log_level: str = "INFO" # INFO | DEBUG
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunResponse:
|
||||
"""Response after starting a pipeline run."""
|
||||
status: str
|
||||
job_id: str
|
||||
video_path: str
|
||||
|
||||
|
||||
DETECT_API_VIEWS = [RunRequest, RunResponse]
|
||||
@@ -214,6 +214,29 @@ class RetryResponse:
|
||||
job_id: str
|
||||
|
||||
|
||||
# --- API request/response ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunRequest:
|
||||
"""Request body for launching a detection pipeline run."""
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
source_asset_id: str = ""
|
||||
checkpoint: bool = True
|
||||
skip_vlm: bool = False
|
||||
skip_cloud: bool = False
|
||||
log_level: str = "INFO"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunResponse:
|
||||
"""Response after starting a pipeline run."""
|
||||
status: str
|
||||
job_id: str
|
||||
video_path: str
|
||||
|
||||
|
||||
# --- Export lists for modelgen ---
|
||||
|
||||
DETECT_VIEWS = [
|
||||
@@ -234,4 +257,6 @@ DETECT_VIEWS = [
|
||||
ReplayResponse,
|
||||
RetryRequest,
|
||||
RetryResponse,
|
||||
RunRequest,
|
||||
RunResponse,
|
||||
]
|
||||
@@ -160,6 +160,39 @@ class AnalyzeRegionsDebugResponse:
|
||||
pair_count: int = 0
|
||||
|
||||
|
||||
# --- Field Segmentation ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentFieldRequest:
|
||||
"""Request body for field segmentation."""
|
||||
image: str # base64 JPEG
|
||||
hue_low: int = 30
|
||||
hue_high: int = 85
|
||||
sat_low: int = 30
|
||||
sat_high: int = 255
|
||||
val_low: int = 30
|
||||
val_high: int = 255
|
||||
morph_kernel: int = 15
|
||||
min_area_ratio: float = 0.05
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentFieldResponse:
|
||||
"""Response from field segmentation."""
|
||||
boundary: List[List[int]] = field(default_factory=list)
|
||||
coverage: float = 0.0
|
||||
mask_b64: str = "" # binary mask as base64 PNG (for downstream stages)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentFieldDebugResponse:
|
||||
"""Response from field segmentation with debug overlay."""
|
||||
boundary: List[List[int]] = field(default_factory=list)
|
||||
coverage: float = 0.0
|
||||
mask_overlay_b64: str = ""
|
||||
|
||||
|
||||
# --- Server Config ---
|
||||
|
||||
|
||||
@@ -193,5 +226,8 @@ INFERENCE_VIEWS = [
|
||||
RegionBox,
|
||||
AnalyzeRegionsResponse,
|
||||
AnalyzeRegionsDebugResponse,
|
||||
SegmentFieldRequest,
|
||||
SegmentFieldResponse,
|
||||
SegmentFieldDebugResponse,
|
||||
ConfigUpdate,
|
||||
]
|
||||
|
||||
@@ -38,6 +38,9 @@ class Job:
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
|
||||
# Timeline — set after frame extraction, or upfront for replay jobs
|
||||
timeline_id: Optional[UUID] = None
|
||||
|
||||
# Lineage
|
||||
parent_id: Optional[UUID] = None
|
||||
run_type: RunType = RunType.INITIAL
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
"""
|
||||
Detection pipeline runtime models.
|
||||
|
||||
These are the data structures that flow between pipeline stages.
|
||||
They contain runtime types (np.ndarray) so modelgen skips them —
|
||||
not generated to SQLModel or TypeScript.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class Frame:
|
||||
sequence: int
|
||||
chunk_id: int
|
||||
timestamp: float # position in video (seconds)
|
||||
image: np.ndarray
|
||||
perceptual_hash: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class BoundingBox:
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextCandidate:
|
||||
frame: Frame
|
||||
bbox: BoundingBox
|
||||
text: str
|
||||
ocr_confidence: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrandDetection:
|
||||
brand: str
|
||||
timestamp: float
|
||||
duration: float
|
||||
confidence: float
|
||||
source: Literal["ocr", "local_vlm", "cloud_llm", "logo_match", "auxiliary"]
|
||||
bbox: BoundingBox | None = None
|
||||
frame_ref: int | None = None
|
||||
content_type: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrandStats:
|
||||
total_appearances: int = 0
|
||||
total_screen_time: float = 0.0
|
||||
avg_confidence: float = 0.0
|
||||
first_seen: float = 0.0
|
||||
last_seen: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineStats:
|
||||
frames_extracted: int = 0
|
||||
frames_after_scene_filter: int = 0
|
||||
cv_regions_detected: int = 0
|
||||
regions_detected: int = 0
|
||||
regions_resolved_by_ocr: int = 0
|
||||
regions_escalated_to_local_vlm: int = 0
|
||||
regions_escalated_to_cloud_llm: int = 0
|
||||
auxiliary_detections: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
processing_time_seconds: float = 0.0
|
||||
estimated_cloud_cost_usd: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionReport:
|
||||
video_source: str
|
||||
content_type: str
|
||||
duration_seconds: float
|
||||
brands: dict[str, BrandStats] = field(default_factory=dict)
|
||||
timeline: list[BrandDetection] = field(default_factory=list)
|
||||
pipeline_stats: PipelineStats = field(default_factory=PipelineStats)
|
||||
@@ -1,46 +0,0 @@
|
||||
"""
|
||||
Pipeline composition config — source of truth for graph topology.
|
||||
|
||||
Defines what stages run, in what order, with what branching.
|
||||
Belongs to a profile. Persisted as JSONB.
|
||||
|
||||
The execution strategy (serial, parallel, distributed) is separate —
|
||||
the runner reads this config and flattens it into a sequence for now.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageRef:
|
||||
"""Reference to a stage in the pipeline graph."""
|
||||
name: str # stage name (matches StageDefinition.name)
|
||||
branch: str = "trunk" # which branch this belongs to
|
||||
execution_target: str = "local" # local | gpu | lambda | gcp
|
||||
|
||||
|
||||
@dataclass
|
||||
class Edge:
|
||||
"""Connection between stages in the graph."""
|
||||
source: str # stage name
|
||||
target: str # stage name
|
||||
condition: str = "" # empty = unconditional, otherwise a routing rule key
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineConfig:
|
||||
"""
|
||||
Pipeline graph topology + routing rules.
|
||||
|
||||
Holder model — stages/edges define the graph shape,
|
||||
routing_rules is a JSONB blob for decision tree logic.
|
||||
"""
|
||||
name: str
|
||||
profile_name: str
|
||||
stages: List[StageRef] = field(default_factory=list)
|
||||
edges: List[Edge] = field(default_factory=list)
|
||||
routing_rules: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
PIPELINE_CONFIG_VIEWS = [StageRef, Edge, PipelineConfig]
|
||||
30
core/schema/models/profile.py
Normal file
30
core/schema/models/profile.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
Profile schema — source of truth for content type profiles.
|
||||
|
||||
A profile has two JSONB fields:
|
||||
- pipeline: graph topology (stages, edges, routing rules)
|
||||
- configs: per-stage config values keyed by stage name
|
||||
|
||||
Validated at read time using generated contracts (StageConfigField, PipelineConfig).
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
@dataclass
|
||||
class Profile:
|
||||
"""
|
||||
A content type profile.
|
||||
|
||||
Defines what pipeline to run and how each stage is configured.
|
||||
Seed data inserted via JSON fixtures on startup.
|
||||
"""
|
||||
id: UUID
|
||||
name: str
|
||||
pipeline: Dict[str, Any] = field(default_factory=dict)
|
||||
configs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
PROFILE_VIEWS = [Profile]
|
||||
153
core/schema/models/stage.py
Normal file
153
core/schema/models/stage.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
Stage & Pipeline Schema Definitions
|
||||
|
||||
Source of truth for:
|
||||
- Stage metadata (StageDefinition, config fields, IO)
|
||||
- Stage config shapes (FrameExtractionConfig, etc.)
|
||||
- Pipeline topology (StageRef, Edge, PipelineConfig)
|
||||
|
||||
Generates: Pydantic (detect/contract.py), TypeScript via modelgen.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# --- Stage metadata ---
|
||||
|
||||
@dataclass
|
||||
class StageConfigField:
|
||||
"""A single tunable config parameter for the editor UI."""
|
||||
name: str
|
||||
type: str # "float", "int", "str", "bool"
|
||||
default: Any
|
||||
description: str = ""
|
||||
min: Optional[float] = None
|
||||
max: Optional[float] = None
|
||||
options: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageIO:
|
||||
"""Declares what a stage reads and writes."""
|
||||
reads: List[str] = field(default_factory=list)
|
||||
writes: List[str] = field(default_factory=list)
|
||||
optional_reads: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageDefinition:
|
||||
"""Complete metadata for a pipeline stage."""
|
||||
name: str
|
||||
label: str
|
||||
description: str
|
||||
category: str = "detection"
|
||||
io: StageIO = field(default_factory=StageIO)
|
||||
config_fields: List[StageConfigField] = field(default_factory=list)
|
||||
tracks_element: Optional[str] = None
|
||||
|
||||
|
||||
# --- Stage config shapes ---
|
||||
|
||||
@dataclass
|
||||
class FrameExtractionConfig:
|
||||
fps: float = 2.0
|
||||
max_frames: int = 500
|
||||
|
||||
|
||||
@dataclass
|
||||
class SceneFilterConfig:
|
||||
hamming_threshold: int = 8
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionConfig:
|
||||
model_name: str = "yolov8n.pt"
|
||||
confidence_threshold: float = 0.3
|
||||
target_classes: List[str] = field(default_factory=lambda: ["logo", "text"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRConfig:
|
||||
languages: List[str] = field(default_factory=lambda: ["en"])
|
||||
min_confidence: float = 0.5
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolverConfig:
|
||||
fuzzy_threshold: int = 75
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegionAnalysisConfig:
|
||||
enabled: bool = True
|
||||
edge_canny_low: int = 50
|
||||
edge_canny_high: int = 150
|
||||
edge_hough_threshold: int = 80
|
||||
edge_hough_min_length: int = 100
|
||||
edge_hough_max_gap: int = 10
|
||||
edge_pair_max_distance: int = 200
|
||||
edge_pair_min_distance: int = 15
|
||||
|
||||
|
||||
@dataclass
|
||||
class FieldSegmentationConfig:
|
||||
enabled: bool = True
|
||||
# HSV green range for pitch detection
|
||||
hue_low: int = 30
|
||||
hue_high: int = 85
|
||||
sat_low: int = 30
|
||||
sat_high: int = 255
|
||||
val_low: int = 30
|
||||
val_high: int = 255
|
||||
# Morphology
|
||||
morph_kernel: int = 15 # kernel size for close/open
|
||||
min_area_ratio: float = 0.05 # minimum contour area as fraction of frame
|
||||
|
||||
|
||||
# --- Pipeline topology ---
|
||||
|
||||
@dataclass
|
||||
class StageRef:
|
||||
"""Reference to a stage in the pipeline graph."""
|
||||
name: str
|
||||
branch: str = "trunk"
|
||||
execution_target: str = "local"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Edge:
|
||||
"""Connection between stages in the graph."""
|
||||
source: str
|
||||
target: str
|
||||
condition: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineConfig:
|
||||
"""Pipeline graph topology + routing rules."""
|
||||
name: str
|
||||
profile_name: str
|
||||
stages: List[StageRef] = field(default_factory=list)
|
||||
edges: List[Edge] = field(default_factory=list)
|
||||
routing_rules: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
# --- Export for modelgen ---
|
||||
|
||||
STAGE_VIEWS = [
|
||||
StageConfigField,
|
||||
StageIO,
|
||||
StageDefinition,
|
||||
FrameExtractionConfig,
|
||||
SceneFilterConfig,
|
||||
DetectionConfig,
|
||||
OCRConfig,
|
||||
ResolverConfig,
|
||||
RegionAnalysisConfig,
|
||||
FieldSegmentationConfig,
|
||||
StageRef,
|
||||
Edge,
|
||||
PipelineConfig,
|
||||
]
|
||||
@@ -1,69 +0,0 @@
|
||||
"""
|
||||
Stage Schema Definitions
|
||||
|
||||
Source of truth for pipeline stage metadata.
|
||||
Generates: Pydantic, TypeScript via modelgen.
|
||||
|
||||
Each stage is defined by its config fields. The implementation
|
||||
lives in detect/stages/<name>.py as a Stage subclass.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageConfigField:
|
||||
"""A single tunable config parameter for the editor UI."""
|
||||
name: str
|
||||
type: str # "float", "int", "str", "bool"
|
||||
default: Any
|
||||
description: str = ""
|
||||
min: Optional[float] = None
|
||||
max: Optional[float] = None
|
||||
options: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageIO:
|
||||
"""Declares what a stage reads and writes."""
|
||||
reads: List[str] = field(default_factory=list)
|
||||
writes: List[str] = field(default_factory=list)
|
||||
optional_reads: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageDefinition:
|
||||
"""
|
||||
Complete metadata for a pipeline stage.
|
||||
|
||||
Lives in schema as the source of truth. Each stage implementation
|
||||
references a StageDefinition. The editor, graph, and checkpoint
|
||||
system all consume this.
|
||||
"""
|
||||
name: str
|
||||
label: str
|
||||
description: str
|
||||
category: str = "detection"
|
||||
io: StageIO = field(default_factory=StageIO)
|
||||
config_fields: List[StageConfigField] = field(default_factory=list)
|
||||
|
||||
# The box label this stage produces that should be time-tracked in the editor.
|
||||
# Set to the label string (e.g. "edge_region") for stages that have a
|
||||
# meaningful temporal element. None means no motion tracker overlay.
|
||||
tracks_element: Optional[str] = None
|
||||
|
||||
# Legacy fields — used by old registry pattern during migration.
|
||||
# New stages use Stage subclass instead.
|
||||
fn: Any = None
|
||||
serialize_fn: Any = None
|
||||
deserialize_fn: Any = None
|
||||
|
||||
|
||||
# --- Export for modelgen ---
|
||||
|
||||
STAGE_VIEWS = [
|
||||
StageConfigField,
|
||||
StageIO,
|
||||
StageDefinition,
|
||||
]
|
||||
Reference in New Issue
Block a user