refactor stage 1

2026-03-27 04:23:21 -03:00
parent df6bcb01e8
commit 291ac8dd40
14 changed files with 688 additions and 450 deletions
--- a/core/schema/models/init.py
+++ b/core/schema/models/init.py
@@ -27,9 +27,11 @@ from .grpc import (
 )
 from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
 from .detect_jobs import (
-    DetectJob, DetectJobStatus, RunType, StageCheckpoint,
+    DetectJob, DetectJobStatus, RunType,
+    Timeline, Checkpoint,
    BrandSource, KnownBrand, SourceBrandSighting,
 )
+from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS
 from .media import AssetStatus, MediaAsset
 from .presets import BUILTIN_PRESETS, TranscodePreset
 from .detect import DETECT_VIEWS  # noqa: F401 — discovered by modelgen generic loader
@@ -40,7 +42,8 @@ from .sources import ChunkInfo, SourceJob, SourceType

 # Core domain models - generates Django, SQLModel, TypeScript
 DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
-               DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting]
+               DetectJob, Timeline, Checkpoint,
+               KnownBrand, SourceBrandSighting]

 # API request/response models - generates TypeScript only (no Django)
 # WorkerStatus from grpc.py is reused here
--- a/core/schema/models/detect_jobs.py
+++ b/core/schema/models/detect_jobs.py
@@ -72,49 +72,58 @@ class DetectJob:


@dataclass
-class StageCheckpoint:
+class Timeline:
    """
-    A checkpoint saved after a pipeline stage completes.
+    The frame sequence from a source video.

-    Binary data (frame images, crops) goes to S3/MinIO.
-    Everything else (structured state) lives here in Postgres.
+    Independent of stages — exists before any stage runs.
+    Stages annotate the timeline, they don't own it.
+    Frames are stored in MinIO as JPEGs.
    """

    id: UUID
-    job_id: UUID
-    stage: str
-    stage_index: int  # position in NODES list (0-7)
+    source_asset_id: Optional[UUID] = None
+    source_video: str = ""
+    profile_name: str = ""
+    fps: float = 2.0

-    # S3 reference for binary data only
-    frames_prefix: str = ""  # s3 prefix: checkpoints/{job_id}/frames/
-
-    # Frame metadata (non-image fields)
+    # Frame metadata (images in MinIO, metadata here)
+    frames_prefix: str = ""  # s3: timelines/{id}/frames/
    frames_manifest: Dict[int, str] = field(default_factory=dict)  # seq → s3 key
-    frames_meta: List[Dict[str, Any]] = field(default_factory=list)  # sequence, chunk_id, timestamp, hash
-    filtered_frame_sequences: List[int] = field(default_factory=list)
+    frames_meta: List[Dict[str, Any]] = field(default_factory=list)

-    # Stage output — stored as blob in MinIO: checkpoints/{job_id}/stages/{stage}.bson
-    # Each stage's serialize_fn/deserialize_fn owns the format.
-    # Postgres only stores the S3 key, not the data itself.
-    stage_output_key: str = ""  # s3 key to the serialized stage output
+    created_at: Optional[datetime] = None

-    # Pipeline state (small, stays in Postgres)
-    stats: Dict[str, Any] = field(default_factory=dict)
-    config_snapshot: Dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class Checkpoint:
+    """
+    A snapshot of pipeline state on a timeline.
+
+    Stage outputs stored as JSONB — each stage serializes to JSON,
+    the checkpoint stores it without knowing the shape.
+
+    parent_id forms a tree: multiple children from the same parent
+    = different config tries from the same starting point.
+    """
+
+    id: UUID
+    timeline_id: UUID
+    parent_id: Optional[UUID] = None  # null = root checkpoint
+
+    # Stage outputs — JSONB per stage, opaque to the checkpoint layer
+    stage_outputs: Dict[str, Any] = field(default_factory=dict)
+
+    # Config that produced this checkpoint
    config_overrides: Dict[str, Any] = field(default_factory=dict)

-    # Input refs (for replay)
-    video_path: str = ""
-    profile_name: str = ""
+    # Pipeline state
+    stats: Dict[str, Any] = field(default_factory=dict)

-    # Scenario — a checkpoint bookmarked for the editor workflow.
-    # Created by seeders (manual scripts that populate state from real footage)
-    # or captured from a running pipeline. Loaded via URL:
-    #   /detection/?job=<job_id>#/editor/<stage>
+    # Scenario bookmark
    is_scenario: bool = False
-    scenario_label: str = ""  # human-readable name, e.g. "chelsea_edges_lowcanny"
+    scenario_label: str = ""

-    # Timestamps
    created_at: Optional[datetime] = None


--- a/core/schema/models/stages.py
+++ b/core/schema/models/stages.py
@@ -0,0 +1,64 @@
+"""
+Stage Schema Definitions
+
+Source of truth for pipeline stage metadata.
+Generates: Pydantic, TypeScript via modelgen.
+
+Each stage is defined by its config fields. The implementation
+lives in detect/stages/<name>.py as a Stage subclass.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, List, Optional
+
+
+@dataclass
+class StageConfigField:
+    """A single tunable config parameter for the editor UI."""
+    name: str
+    type: str          # "float", "int", "str", "bool"
+    default: Any
+    description: str = ""
+    min: Optional[float] = None
+    max: Optional[float] = None
+    options: Optional[List[str]] = None
+
+
+@dataclass
+class StageIO:
+    """Declares what a stage reads and writes."""
+    reads: List[str] = field(default_factory=list)
+    writes: List[str] = field(default_factory=list)
+    optional_reads: List[str] = field(default_factory=list)
+
+
+@dataclass
+class StageDefinition:
+    """
+    Complete metadata for a pipeline stage.
+
+    Lives in schema as the source of truth. Each stage implementation
+    references a StageDefinition. The editor, graph, and checkpoint
+    system all consume this.
+    """
+    name: str
+    label: str
+    description: str
+    category: str = "detection"
+    io: StageIO = field(default_factory=StageIO)
+    config_fields: List[StageConfigField] = field(default_factory=list)
+
+    # Legacy fields — used by old registry pattern during migration.
+    # New stages use Stage subclass instead.
+    fn: Any = None
+    serialize_fn: Any = None
+    deserialize_fn: Any = None
+
+
+# --- Export for modelgen ---
+
+STAGE_VIEWS = [
+    StageConfigField,
+    StageIO,
+    StageDefinition,
+]