This commit is contained in:
2026-03-30 09:53:10 -03:00
parent 4220b0418e
commit aac27b8504
32 changed files with 1068 additions and 329 deletions

View File

@@ -28,6 +28,7 @@ from .grpc import (
from .job import Job, JobStatus, RunType
from .timeline import Timeline
from .checkpoint import Checkpoint
from .stage_output import StageOutput
from .brand import BrandSource, Brand
from .media import AssetStatus, MediaAsset
from .profile import Profile
@@ -41,7 +42,7 @@ from .source import ChunkInfo, SourceJob, SourceType
# Core domain models - generates SQLModel, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset,
Job, Timeline, Checkpoint, Brand, Profile]
Job, Timeline, Checkpoint, StageOutput, Brand, Profile]
# API request/response models
API_MODELS = [

View File

@@ -11,25 +11,24 @@ class Checkpoint:
"""
A snapshot of pipeline state on a timeline.
Stage outputs stored as JSONB — each stage serializes to JSON,
the checkpoint stores it without knowing the shape.
parent_id forms a tree: multiple children from the same parent
= different config tries from the same starting point.
Stage outputs are stored separately in StageOutput table,
not carried in the checkpoint itself.
"""
id: UUID
timeline_id: UUID
job_id: Optional[UUID] = None # which job created this checkpoint
parent_id: Optional[UUID] = None # null = root checkpoint
job_id: Optional[UUID] = None
parent_id: Optional[UUID] = None
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
stage_outputs: Dict[str, Any] = field(default_factory=dict)
stage_name: str = "" # which stage produced this checkpoint
# Config that produced this checkpoint
config_overrides: Dict[str, Any] = field(default_factory=dict)
# Pipeline state
# Pipeline stats at this point
stats: Dict[str, Any] = field(default_factory=dict)
# Scenario bookmark

View File

@@ -38,7 +38,7 @@ class Job:
video_path: str
profile_name: str = "soccer_broadcast"
# Timeline — set after frame extraction, or upfront for replay jobs
# Timeline — set at job creation (timeline exists before any job)
timeline_id: Optional[UUID] = None
# Lineage

View File

@@ -0,0 +1,27 @@
"""StageOutput schema — per-stage result storage."""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, Optional
from uuid import UUID
@dataclass
class StageOutput:
"""
Output of a single stage within a job.
Flat table with composite unique (job_id, stage_name).
Upserted on each stage completion. Independently queryable —
"give me all edge detection outputs for this timeline."
"""
id: UUID
job_id: UUID
timeline_id: UUID
stage_name: str
checkpoint_id: Optional[UUID] = None
output: Dict[str, Any] = field(default_factory=dict)
created_at: Optional[datetime] = None

View File

@@ -1,4 +1,4 @@
"""Timeline schema — source of truth for frame sequences."""
"""Timeline schema — source of truth for source material sequences."""
from dataclasses import dataclass, field
from datetime import datetime
@@ -9,21 +9,27 @@ from uuid import UUID
@dataclass
class Timeline:
"""
The frame sequence from a source video.
A user-created selection of source material.
Independent of stages — exists before any stage runs.
Frames stored in MinIO as JPEGs, metadata here.
One timeline per job.
Exists before any job runs. Holds source references (chunk paths,
asset IDs) and extraction config.
Frame cache: extracted frames live at media/timelines/{id}/frames/
as JPEGs. Any job on this timeline reads from the cache. Cache is
rebuildable from chunks (clear + re-extract). For ephemeral sources
(streams), the cache is the only record.
Many jobs can work on the same timeline.
"""
id: UUID
name: str = ""
source_asset_id: Optional[UUID] = None
source_video: str = ""
chunk_paths: List[str] = field(default_factory=list)
profile_name: str = ""
status: str = "created" # created | cached | ready
fps: float = 2.0
frames_prefix: str = "" # s3: timeline/{id}/frames/
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
frame_count: int = 0
source_ephemeral: bool = False # True for streams — cache can't be rebuilt
created_at: Optional[datetime] = None

View File

@@ -2,7 +2,7 @@
Serializers for detection pipeline runtime models.
Special handling:
- Frame.image (np.ndarray → S3, excluded from JSON)
- Frame.image (np.ndarray, ephemeral — only metadata serialized)
- TextCandidate.frame (object ref → frame_sequence integer)
Everything else uses dataclasses.asdict() via safe_construct.
"""
@@ -24,7 +24,7 @@ from ._common import safe_construct, serialize_dataclass, serialize_dataclass_li
# ---------------------------------------------------------------------------
# Frame — image goes to S3 separately
# Frame — metadata only (image is ephemeral, re-extracted from chunks)
# ---------------------------------------------------------------------------
def serialize_frame_meta(frame: Frame) -> dict:
@@ -34,21 +34,9 @@ def serialize_frame_meta(frame: Frame) -> dict:
return result
def serialize_frames_with_upload(frames: list[Frame], job_id: str) -> tuple[list[dict], dict[int, str]]:
"""Upload frame images to S3, return metadata + manifest."""
from core.detect.checkpoint.frames import save_frames
manifest = save_frames(job_id, frames)
meta = [serialize_frame_meta(f) for f in frames]
return meta, manifest
def deserialize_frames_with_download(meta: list[dict], manifest: dict, job_id: str) -> list[Frame]:
"""Load frames from S3 + metadata."""
from core.detect.checkpoint.frames import load_frames
int_manifest = {int(k): v for k, v in manifest.items()}
return load_frames(int_manifest, meta)
def serialize_frames_meta(frames: list[Frame]) -> list[dict]:
"""Serialize frame metadata for all frames."""
return [serialize_frame_meta(f) for f in frames]
# ---------------------------------------------------------------------------