refactor stage 1
This commit is contained in:
@@ -9,7 +9,8 @@ from sqlmodel import select
|
||||
|
||||
from .connection import get_session
|
||||
from .models import (
|
||||
DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting,
|
||||
DetectJob, Timeline, Checkpoint,
|
||||
KnownBrand, SourceBrandSighting,
|
||||
)
|
||||
|
||||
|
||||
@@ -55,72 +56,86 @@ def list_detect_jobs(
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# StageCheckpoint
|
||||
# Timeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def save_stage_checkpoint(**fields) -> StageCheckpoint:
|
||||
def create_timeline(**fields) -> Timeline:
|
||||
timeline = Timeline(**fields)
|
||||
with get_session() as session:
|
||||
# Upsert: replace if same job_id + stage
|
||||
job_id = fields.get("job_id")
|
||||
stage = fields.get("stage")
|
||||
if job_id and stage:
|
||||
stmt = select(StageCheckpoint).where(
|
||||
StageCheckpoint.job_id == job_id,
|
||||
StageCheckpoint.stage == stage,
|
||||
)
|
||||
existing = session.exec(stmt).first()
|
||||
if existing:
|
||||
for k, v in fields.items():
|
||||
setattr(existing, k, v)
|
||||
session.commit()
|
||||
session.refresh(existing)
|
||||
return existing
|
||||
session.add(timeline)
|
||||
session.commit()
|
||||
session.refresh(timeline)
|
||||
return timeline
|
||||
|
||||
checkpoint = StageCheckpoint(**fields)
|
||||
|
||||
def get_timeline(timeline_id: UUID) -> Timeline | None:
|
||||
with get_session() as session:
|
||||
return session.get(Timeline, timeline_id)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Checkpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def save_checkpoint(**fields) -> Checkpoint:
|
||||
checkpoint = Checkpoint(**fields)
|
||||
with get_session() as session:
|
||||
session.add(checkpoint)
|
||||
session.commit()
|
||||
session.refresh(checkpoint)
|
||||
return checkpoint
|
||||
|
||||
|
||||
def get_stage_checkpoint(job_id: UUID, stage: str) -> StageCheckpoint | None:
|
||||
def get_checkpoint(checkpoint_id: UUID) -> Checkpoint | None:
|
||||
with get_session() as session:
|
||||
stmt = select(StageCheckpoint).where(
|
||||
StageCheckpoint.job_id == job_id,
|
||||
StageCheckpoint.stage == stage,
|
||||
return session.get(Checkpoint, checkpoint_id)
|
||||
|
||||
|
||||
def get_latest_checkpoint(timeline_id: UUID, parent_id: UUID | None = None) -> Checkpoint | None:
|
||||
"""Get the most recent checkpoint for a timeline, optionally from a specific parent."""
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(Checkpoint)
|
||||
.where(Checkpoint.timeline_id == timeline_id)
|
||||
)
|
||||
if parent_id is not None:
|
||||
stmt = stmt.where(Checkpoint.parent_id == parent_id)
|
||||
stmt = stmt.order_by(Checkpoint.created_at.desc())
|
||||
return session.exec(stmt).first()
|
||||
|
||||
|
||||
def list_checkpoints(timeline_id: UUID) -> list[Checkpoint]:
|
||||
"""List all checkpoints for a timeline."""
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(Checkpoint)
|
||||
.where(Checkpoint.timeline_id == timeline_id)
|
||||
.order_by(Checkpoint.created_at)
|
||||
)
|
||||
return list(session.exec(stmt).all())
|
||||
|
||||
|
||||
def get_root_checkpoint(timeline_id: UUID) -> Checkpoint | None:
|
||||
"""Get the root checkpoint (no parent) for a timeline."""
|
||||
with get_session() as session:
|
||||
stmt = select(Checkpoint).where(
|
||||
Checkpoint.timeline_id == timeline_id,
|
||||
Checkpoint.parent_id == None,
|
||||
)
|
||||
return session.exec(stmt).first()
|
||||
|
||||
|
||||
def list_stage_checkpoints(job_id: UUID) -> list[str]:
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(StageCheckpoint.stage)
|
||||
.where(StageCheckpoint.job_id == job_id)
|
||||
.order_by(StageCheckpoint.stage_index)
|
||||
)
|
||||
return list(session.exec(stmt).all())
|
||||
|
||||
|
||||
def list_scenarios() -> list[StageCheckpoint]:
|
||||
def list_scenarios() -> list[Checkpoint]:
|
||||
"""List all checkpoints marked as scenarios."""
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(StageCheckpoint)
|
||||
.where(StageCheckpoint.is_scenario == True)
|
||||
.order_by(StageCheckpoint.created_at.desc())
|
||||
select(Checkpoint)
|
||||
.where(Checkpoint.is_scenario == True)
|
||||
.order_by(Checkpoint.created_at.desc())
|
||||
)
|
||||
return list(session.exec(stmt).all())
|
||||
|
||||
|
||||
def delete_stage_checkpoints(job_id: UUID) -> None:
|
||||
with get_session() as session:
|
||||
stmt = select(StageCheckpoint).where(StageCheckpoint.job_id == job_id)
|
||||
for cp in session.exec(stmt).all():
|
||||
session.delete(cp)
|
||||
session.commit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# KnownBrand
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -181,24 +181,30 @@ class DetectJob(SQLModel, table=True):
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
class StageCheckpoint(SQLModel, table=True):
|
||||
"""A checkpoint saved after a pipeline stage completes."""
|
||||
__tablename__ = "stage_checkpoints"
|
||||
class Timeline(SQLModel, table=True):
|
||||
"""Frame sequence from a source video. Independent of stages."""
|
||||
__tablename__ = "timelines"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
job_id: UUID = Field(index=True)
|
||||
stage: str
|
||||
stage_index: int
|
||||
source_asset_id: Optional[UUID] = Field(default=None, index=True)
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
frames_prefix: str = ""
|
||||
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
|
||||
filtered_frame_sequences: List[int] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
|
||||
stage_output_key: str = "" # s3 key: checkpoints/{job_id}/stages/{stage}.bson
|
||||
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
config_snapshot: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
class Checkpoint(SQLModel, table=True):
|
||||
"""Snapshot of pipeline state. parent_id forms a tree."""
|
||||
__tablename__ = "checkpoints"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
timeline_id: UUID = Field(index=True)
|
||||
parent_id: Optional[UUID] = Field(default=None, index=True)
|
||||
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
video_path: str = ""
|
||||
profile_name: str = ""
|
||||
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
@@ -27,9 +27,11 @@ from .grpc import (
|
||||
)
|
||||
from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
|
||||
from .detect_jobs import (
|
||||
DetectJob, DetectJobStatus, RunType, StageCheckpoint,
|
||||
DetectJob, DetectJobStatus, RunType,
|
||||
Timeline, Checkpoint,
|
||||
BrandSource, KnownBrand, SourceBrandSighting,
|
||||
)
|
||||
from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
@@ -40,7 +42,8 @@ from .sources import ChunkInfo, SourceJob, SourceType
|
||||
|
||||
# Core domain models - generates Django, SQLModel, TypeScript
|
||||
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
|
||||
DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting]
|
||||
DetectJob, Timeline, Checkpoint,
|
||||
KnownBrand, SourceBrandSighting]
|
||||
|
||||
# API request/response models - generates TypeScript only (no Django)
|
||||
# WorkerStatus from grpc.py is reused here
|
||||
|
||||
@@ -72,49 +72,58 @@ class DetectJob:
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageCheckpoint:
|
||||
class Timeline:
|
||||
"""
|
||||
A checkpoint saved after a pipeline stage completes.
|
||||
The frame sequence from a source video.
|
||||
|
||||
Binary data (frame images, crops) goes to S3/MinIO.
|
||||
Everything else (structured state) lives here in Postgres.
|
||||
Independent of stages — exists before any stage runs.
|
||||
Stages annotate the timeline, they don't own it.
|
||||
Frames are stored in MinIO as JPEGs.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
job_id: UUID
|
||||
stage: str
|
||||
stage_index: int # position in NODES list (0-7)
|
||||
source_asset_id: Optional[UUID] = None
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
|
||||
# S3 reference for binary data only
|
||||
frames_prefix: str = "" # s3 prefix: checkpoints/{job_id}/frames/
|
||||
|
||||
# Frame metadata (non-image fields)
|
||||
# Frame metadata (images in MinIO, metadata here)
|
||||
frames_prefix: str = "" # s3: timelines/{id}/frames/
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list) # sequence, chunk_id, timestamp, hash
|
||||
filtered_frame_sequences: List[int] = field(default_factory=list)
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
# Stage output — stored as blob in MinIO: checkpoints/{job_id}/stages/{stage}.bson
|
||||
# Each stage's serialize_fn/deserialize_fn owns the format.
|
||||
# Postgres only stores the S3 key, not the data itself.
|
||||
stage_output_key: str = "" # s3 key to the serialized stage output
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
# Pipeline state (small, stays in Postgres)
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
config_snapshot: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""
|
||||
A snapshot of pipeline state on a timeline.
|
||||
|
||||
Stage outputs stored as JSONB — each stage serializes to JSON,
|
||||
the checkpoint stores it without knowing the shape.
|
||||
|
||||
parent_id forms a tree: multiple children from the same parent
|
||||
= different config tries from the same starting point.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Config that produced this checkpoint
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Input refs (for replay)
|
||||
video_path: str = ""
|
||||
profile_name: str = ""
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Scenario — a checkpoint bookmarked for the editor workflow.
|
||||
# Created by seeders (manual scripts that populate state from real footage)
|
||||
# or captured from a running pipeline. Loaded via URL:
|
||||
# /detection/?job=<job_id>#/editor/<stage>
|
||||
# Scenario bookmark
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = "" # human-readable name, e.g. "chelsea_edges_lowcanny"
|
||||
scenario_label: str = ""
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
|
||||
64
core/schema/models/stages.py
Normal file
64
core/schema/models/stages.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""
|
||||
Stage Schema Definitions
|
||||
|
||||
Source of truth for pipeline stage metadata.
|
||||
Generates: Pydantic, TypeScript via modelgen.
|
||||
|
||||
Each stage is defined by its config fields. The implementation
|
||||
lives in detect/stages/<name>.py as a Stage subclass.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageConfigField:
|
||||
"""A single tunable config parameter for the editor UI."""
|
||||
name: str
|
||||
type: str # "float", "int", "str", "bool"
|
||||
default: Any
|
||||
description: str = ""
|
||||
min: Optional[float] = None
|
||||
max: Optional[float] = None
|
||||
options: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageIO:
|
||||
"""Declares what a stage reads and writes."""
|
||||
reads: List[str] = field(default_factory=list)
|
||||
writes: List[str] = field(default_factory=list)
|
||||
optional_reads: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageDefinition:
|
||||
"""
|
||||
Complete metadata for a pipeline stage.
|
||||
|
||||
Lives in schema as the source of truth. Each stage implementation
|
||||
references a StageDefinition. The editor, graph, and checkpoint
|
||||
system all consume this.
|
||||
"""
|
||||
name: str
|
||||
label: str
|
||||
description: str
|
||||
category: str = "detection"
|
||||
io: StageIO = field(default_factory=StageIO)
|
||||
config_fields: List[StageConfigField] = field(default_factory=list)
|
||||
|
||||
# Legacy fields — used by old registry pattern during migration.
|
||||
# New stages use Stage subclass instead.
|
||||
fn: Any = None
|
||||
serialize_fn: Any = None
|
||||
deserialize_fn: Any = None
|
||||
|
||||
|
||||
# --- Export for modelgen ---
|
||||
|
||||
STAGE_VIEWS = [
|
||||
StageConfigField,
|
||||
StageIO,
|
||||
StageDefinition,
|
||||
]
|
||||
Reference in New Issue
Block a user