This commit is contained in:
2026-03-30 09:53:10 -03:00
parent 4220b0418e
commit aac27b8504
32 changed files with 1068 additions and 329 deletions

View File

@@ -127,23 +127,24 @@ class Job(models.Model):
class Timeline(models.Model):
"""The frame sequence from a source video."""
"""A user-created selection of source material."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=255)
source_asset_id = models.UUIDField(null=True, blank=True)
source_video = models.CharField(max_length=255)
chunk_paths = models.JSONField(default=list, blank=True)
profile_name = models.CharField(max_length=255)
status = models.CharField(max_length=255)
fps = models.FloatField(default=2.0)
frames_prefix = models.CharField(max_length=255)
frames_manifest = models.JSONField(default=dict, blank=True)
frames_meta = models.JSONField(default=list, blank=True)
frame_count = models.IntegerField(default=0)
source_ephemeral = models.BooleanField(default=False)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)
return self.name
class Checkpoint(models.Model):
@@ -153,7 +154,7 @@ class Checkpoint(models.Model):
timeline_id = models.UUIDField()
job_id = models.UUIDField(null=True, blank=True)
parent_id = models.UUIDField(null=True, blank=True)
stage_outputs = models.JSONField(default=dict, blank=True)
stage_name = models.CharField(max_length=255)
config_overrides = models.JSONField(default=dict, blank=True)
stats = models.JSONField(default=dict, blank=True)
is_scenario = models.BooleanField(default=False)
@@ -167,6 +168,24 @@ class Checkpoint(models.Model):
return str(self.id)
class StageOutput(models.Model):
"""Output of a single stage within a job."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
job_id = models.UUIDField()
timeline_id = models.UUIDField()
stage_name = models.CharField(max_length=255)
checkpoint_id = models.UUIDField(null=True, blank=True)
output = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)
class Brand(models.Model):
"""A brand discovered or registered in the system."""

View File

@@ -11,6 +11,7 @@ from .run import router as run_router
from .sse import router as sse_router
from .replay import router as replay_router
from .config import router as config_router
from .timeline import router as timeline_router
router = APIRouter()
router.include_router(sources_router)
@@ -18,3 +19,4 @@ router.include_router(run_router)
router.include_router(sse_router)
router.include_router(replay_router)
router.include_router(config_router)
router.include_router(timeline_router)

View File

@@ -137,12 +137,15 @@ class CheckpointData(BaseModel):
@router.get("/checkpoints/{timeline_id}/{stage}", response_model=CheckpointData)
def get_checkpoint_data(timeline_id: str, stage: str):
"""Load checkpoint frames + metadata for the editor UI."""
"""Load checkpoint frames + metadata for the editor UI.
Reads from the timeline's frame cache (local filesystem).
"""
from uuid import UUID
from core.db.models import Timeline, Checkpoint
from core.db.connection import get_session
from core.db.checkpoint import list_checkpoints
from core.detect.checkpoint.frames import load_frames_b64
from core.detect.checkpoint.frames import load_cached_frames_b64
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
@@ -152,16 +155,14 @@ def get_checkpoint_data(timeline_id: str, stage: str):
checkpoints = list_checkpoints(session, UUID(timeline_id))
if not checkpoints:
raise HTTPException(status_code=404, detail=f"No checkpoints for timeline {timeline_id}")
# Prefer a checkpoint that has this stage's output; fall back to latest
# Prefer a checkpoint for this stage; fall back to latest
checkpoint = next(
(c for c in reversed(checkpoints) if stage in (c.stage_outputs or {})),
(c for c in reversed(checkpoints) if c.stage_name == stage),
checkpoints[-1],
)
raw_manifest = timeline.frames_manifest or {}
manifest = {int(k): v for k, v in raw_manifest.items()}
frames_b64 = load_frames_b64(manifest, timeline.frames_meta or [])
# Read from timeline's frame cache
frames_b64 = load_cached_frames_b64(timeline_id)
frame_list = [
CheckpointFrameInfo(seq=f["seq"], timestamp=f["timestamp"], jpeg_b64=f["jpeg_b64"])
for f in frames_b64
@@ -171,7 +172,7 @@ def get_checkpoint_data(timeline_id: str, stage: str):
timeline_id=timeline_id,
stage=stage,
profile_name=timeline.profile_name,
video_path=timeline.source_video,
video_path=timeline.chunk_paths[0] if timeline.chunk_paths else "",
is_scenario=checkpoint.is_scenario,
scenario_label=checkpoint.scenario_label,
frames=frame_list,
@@ -195,14 +196,12 @@ def list_scenarios_endpoint():
timeline = session.get(Timeline, s.timeline_id)
if not timeline:
continue
last_stage = next(reversed(s.stage_outputs), "") if s.stage_outputs else ""
info = ScenarioInfo(
timeline_id=str(s.timeline_id),
stage=last_stage,
stage=s.stage_name,
scenario_label=s.scenario_label,
profile_name=timeline.profile_name,
video_path=timeline.source_video,
frame_count=len(timeline.frames_manifest or {}),
video_path=timeline.chunk_paths[0] if timeline.chunk_paths else "",
created_at=str(s.created_at) if s.created_at else "",
)
result.append(info)

View File

@@ -1,7 +1,7 @@
"""
Pipeline run endpoints.
POST /detect/run — launch pipeline on selected source
POST /detect/run — launch pipeline on a timeline
POST /detect/stop/{job_id} — cancel a running pipeline
POST /detect/pause/{job_id} — pause after current stage
POST /detect/resume/{job_id} — resume a paused pipeline
@@ -30,20 +30,20 @@ _cancelled_jobs: set[str] = set()
class RunRequest(BaseModel):
video_path: str # storage key
timeline_id: str
profile_name: str = "soccer_broadcast"
source_asset_id: str = ""
checkpoint: bool = True
skip_vlm: bool = False
skip_cloud: bool = False
log_level: str = "INFO" # INFO | DEBUG
pause_after_stage: bool = False
config_overrides: dict | None = None
class RunResponse(BaseModel):
status: str
job_id: str
video_path: str
timeline_id: str
def _resolve_video_path(video_path: str) -> str:
@@ -59,13 +59,41 @@ def _resolve_video_path(video_path: str) -> str:
@router.post("/run", response_model=RunResponse)
def run_pipeline(req: RunRequest):
"""Launch a detection pipeline run on a source chunk."""
"""Launch a detection pipeline run on a timeline."""
from core.detect import emit
from core.detect.graph import get_pipeline
from core.detect.state import DetectState
from core.detect.checkpoint.storage import get_timeline
from core.db.connection import get_session
from core.db.job import create_job, update_job_status
local_path = _resolve_video_path(req.video_path)
job_id = str(uuid.uuid4())
# Load timeline
try:
timeline = get_timeline(req.timeline_id)
except ValueError:
raise HTTPException(status_code=404, detail=f"Timeline not found: {req.timeline_id}")
chunk_paths = timeline["chunk_paths"]
if not chunk_paths:
raise HTTPException(status_code=400, detail="Timeline has no chunk paths")
# Resolve first chunk to local path for the pipeline
local_path = _resolve_video_path(chunk_paths[0])
# Create job in DB
source_asset_id_str = timeline.get("source_asset_id", "")
with get_session() as session:
from uuid import UUID as _UUID
source_asset_id = _UUID(source_asset_id_str) if source_asset_id_str else uuid.uuid4()
job = create_job(
session,
source_asset_id=source_asset_id,
video_path=chunk_paths[0],
timeline_id=_UUID(req.timeline_id),
profile_name=req.profile_name,
config_overrides=req.config_overrides,
)
job_id = str(job.id)
if req.skip_vlm:
os.environ["SKIP_VLM"] = "1"
@@ -77,7 +105,7 @@ def run_pipeline(req: RunRequest):
elif "SKIP_CLOUD" in os.environ:
del os.environ["SKIP_CLOUD"]
# Clear any stale events from a previous run with same job_id
# Clear any stale events
from core.events import _get_redis
from core.detect.events import DETECT_EVENTS_PREFIX
r = _get_redis()
@@ -94,7 +122,9 @@ def run_pipeline(req: RunRequest):
video_path=local_path,
job_id=job_id,
profile_name=req.profile_name,
source_asset_id=req.source_asset_id,
source_asset_id=source_asset_id_str or str(source_asset_id),
timeline_id=req.timeline_id,
config_overrides=req.config_overrides or {},
)
from core.detect.graph import (
@@ -105,18 +135,29 @@ def run_pipeline(req: RunRequest):
set_cancel_check(job_id, lambda: job_id in _cancelled_jobs)
init_pause(job_id, pause_after_stage=req.pause_after_stage)
def _update_job(status, stage=None, error=None):
from core.db.connection import get_session
from core.db.job import update_job_status
with get_session() as session:
update_job_status(session, _UUID(job_id), status,
current_stage=stage, error_message=error)
def _run():
try:
_update_job("running")
emit.log(job_id, "Pipeline", "INFO",
f"Starting pipeline: {req.video_path} (profile={req.profile_name})")
f"Starting pipeline: {chunk_paths[0]} (profile={req.profile_name})")
pipeline.invoke(initial_state)
_update_job("completed")
emit.log(job_id, "Pipeline", "INFO", "Pipeline completed successfully")
emit.job_complete(job_id, {"status": "completed"})
except PipelineCancelled:
_update_job("cancelled")
emit.log(job_id, "Pipeline", "INFO", "Pipeline cancelled")
emit.job_complete(job_id, {"status": "cancelled"})
except Exception as e:
logger.exception("Pipeline run %s failed: %s", job_id, e)
_update_job("failed", error=str(e))
from core.detect.graph import _node_states, NODES
if job_id in _node_states:
states = _node_states[job_id]
@@ -134,12 +175,14 @@ def run_pipeline(req: RunRequest):
clear_cancel_check(job_id)
clear_pause(job_id)
emit.clear_run_context()
from core.detect.checkpoint.runner_bridge import reset_checkpoint_state
reset_checkpoint_state(job_id)
thread = threading.Thread(target=_run, daemon=True, name=f"pipeline-{job_id}")
_running_jobs[job_id] = thread
thread.start()
return RunResponse(status="started", job_id=job_id, video_path=req.video_path)
return RunResponse(status="started", job_id=job_id, timeline_id=req.timeline_id)
@router.post("/stop/{job_id}")
@@ -224,18 +267,6 @@ def pipeline_status(job_id: str):
return {"status": status, "job_id": job_id}
@router.get("/timeline/{job_id}")
def get_timeline_for_job(job_id: str):
"""Get the timeline_id for a running or completed job."""
from core.detect.checkpoint.runner_bridge import get_timeline_id
tid = get_timeline_id(job_id)
if tid is None:
raise HTTPException(status_code=404, detail=f"No timeline for job: {job_id}")
return {"timeline_id": tid, "job_id": job_id}
@router.post("/clear/{job_id}")
def clear_pipeline(job_id: str):
"""Clear events for a job from Redis."""

226
core/api/detect/timeline.py Normal file
View File

@@ -0,0 +1,226 @@
"""
Timeline + Job management endpoints.
POST /detect/timeline — create timeline from chunk selection
GET /detect/timeline — list timelines
GET /detect/timeline/{id} — timeline detail
DELETE /detect/timeline/{id}/cache — clear frame cache
GET /detect/jobs — list jobs (optionally by timeline)
GET /detect/jobs/{id} — job detail + checkpoints + stage outputs
"""
from __future__ import annotations
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/detect", tags=["detect"])
# --- Request/Response models ---
class CreateTimelineRequest(BaseModel):
chunk_paths: list[str]
profile_name: str = "soccer_broadcast"
name: str = ""
source_asset_id: str = ""
fps: float = 2.0
class TimelineResponse(BaseModel):
id: str
name: str
chunk_paths: list[str]
profile_name: str
status: str
fps: float
frame_count: int
source_ephemeral: bool
created_at: str | None = None
class JobResponse(BaseModel):
id: str
timeline_id: str | None
source_asset_id: str
video_path: str
profile_name: str
run_type: str
status: str
current_stage: str | None
config_overrides: dict
error_message: str | None
created_at: str | None
started_at: str | None
completed_at: str | None
class JobDetailResponse(JobResponse):
checkpoints: list[dict]
stage_outputs: dict[str, dict]
# --- Timeline endpoints ---
@router.post("/timeline", response_model=TimelineResponse)
def create_timeline_endpoint(req: CreateTimelineRequest):
"""Create a timeline from a chunk selection."""
from uuid import UUID
from core.detect.checkpoint.storage import create_timeline
source_asset_id = UUID(req.source_asset_id) if req.source_asset_id else None
tid = create_timeline(
chunk_paths=req.chunk_paths,
profile_name=req.profile_name,
name=req.name,
source_asset_id=source_asset_id,
fps=req.fps,
)
from core.detect.checkpoint.storage import get_timeline
tl = get_timeline(tid)
return TimelineResponse(
id=tl["id"],
name=tl["name"],
chunk_paths=tl["chunk_paths"],
profile_name=tl["profile_name"],
status=tl["status"],
fps=tl["fps"],
frame_count=0,
source_ephemeral=False,
created_at=tl["created_at"],
)
@router.get("/timeline", response_model=list[TimelineResponse])
def list_timelines():
"""List all timelines."""
from sqlmodel import select
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
stmt = select(Timeline).order_by(Timeline.created_at.desc())
timelines = session.exec(stmt).all()
return [
TimelineResponse(
id=str(t.id),
name=t.name,
chunk_paths=t.chunk_paths or [],
profile_name=t.profile_name,
status=t.status,
fps=t.fps,
frame_count=t.frame_count,
source_ephemeral=t.source_ephemeral,
created_at=str(t.created_at) if t.created_at else None,
)
for t in timelines
]
@router.get("/timeline/{timeline_id}", response_model=TimelineResponse)
def get_timeline_endpoint(timeline_id: str):
"""Get timeline detail."""
from core.detect.checkpoint.storage import get_timeline
try:
tl = get_timeline(timeline_id)
except ValueError:
raise HTTPException(status_code=404, detail=f"Timeline not found: {timeline_id}")
from core.detect.checkpoint.frames import cache_exists
from uuid import UUID
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
return TimelineResponse(
id=tl["id"],
name=tl["name"],
chunk_paths=tl["chunk_paths"],
profile_name=tl["profile_name"],
status=tl["status"],
fps=tl["fps"],
frame_count=timeline.frame_count if timeline else 0,
source_ephemeral=timeline.source_ephemeral if timeline else False,
created_at=tl["created_at"],
)
@router.delete("/timeline/{timeline_id}/cache")
def clear_timeline_cache(timeline_id: str):
"""Clear the frame cache for a timeline."""
from core.detect.checkpoint.frames import clear_cache
from core.detect.checkpoint.storage import update_timeline_status
clear_cache(timeline_id)
update_timeline_status(timeline_id, "created")
return {"status": "cleared", "timeline_id": timeline_id}
# --- Job endpoints ---
def _job_to_response(job) -> JobResponse:
return JobResponse(
id=str(job.id),
timeline_id=str(job.timeline_id) if job.timeline_id else None,
source_asset_id=str(job.source_asset_id),
video_path=job.video_path,
profile_name=job.profile_name,
run_type=job.run_type,
status=job.status,
current_stage=job.current_stage,
config_overrides=job.config_overrides or {},
error_message=job.error_message,
created_at=str(job.created_at) if job.created_at else None,
started_at=str(job.started_at) if job.started_at else None,
completed_at=str(job.completed_at) if job.completed_at else None,
)
@router.get("/jobs", response_model=list[JobResponse])
def list_jobs_endpoint(timeline_id: str | None = Query(None)):
"""List jobs, optionally filtered by timeline."""
from uuid import UUID
from core.db.connection import get_session
from core.db.job import list_jobs
tid = UUID(timeline_id) if timeline_id else None
with get_session() as session:
jobs = list_jobs(session, timeline_id=tid)
return [_job_to_response(j) for j in jobs]
@router.get("/jobs/{job_id}", response_model=JobDetailResponse)
def get_job_endpoint(job_id: str):
"""Get job detail with checkpoints and stage outputs."""
from uuid import UUID
from core.db.connection import get_session
from core.db.job import get_job
from core.detect.checkpoint.storage import (
get_checkpoints_for_job,
load_stage_outputs_for_job,
)
with get_session() as session:
job = get_job(session, UUID(job_id))
if not job:
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
checkpoints = get_checkpoints_for_job(job_id)
stage_outputs = load_stage_outputs_for_job(job_id)
base = _job_to_response(job)
return JobDetailResponse(
**base.model_dump(),
checkpoints=checkpoints,
stage_outputs=stage_outputs,
)

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
from datetime import datetime
from typing import Optional
from uuid import UUID
@@ -10,10 +11,70 @@ from sqlmodel import Session, select
from .models import Job
def list_jobs(session: Session, parent_id: Optional[UUID] = None, status: Optional[str] = None) -> list[Job]:
def create_job(
session: Session,
source_asset_id: UUID,
video_path: str,
timeline_id: UUID,
profile_name: str = "soccer_broadcast",
run_type: str = "initial",
parent_id: UUID | None = None,
config_overrides: dict | None = None,
) -> Job:
job = Job(
source_asset_id=source_asset_id,
video_path=video_path,
timeline_id=timeline_id,
profile_name=profile_name,
run_type=run_type,
parent_id=parent_id,
config_overrides=config_overrides or {},
status="pending",
)
session.add(job)
session.commit()
session.refresh(job)
return job
def update_job_status(
session: Session,
job_id: UUID,
status: str,
current_stage: str | None = None,
error_message: str | None = None,
):
job = session.get(Job, job_id)
if not job:
return
job.status = status
if current_stage is not None:
job.current_stage = current_stage
if error_message is not None:
job.error_message = error_message
if status == "running" and not job.started_at:
job.started_at = datetime.utcnow()
if status in ("completed", "failed", "cancelled"):
job.completed_at = datetime.utcnow()
session.commit()
def get_job(session: Session, job_id: UUID) -> Job | None:
return session.get(Job, job_id)
def list_jobs(
session: Session,
timeline_id: UUID | None = None,
parent_id: UUID | None = None,
status: str | None = None,
) -> list[Job]:
stmt = select(Job)
if timeline_id:
stmt = stmt.where(Job.timeline_id == timeline_id)
if parent_id:
stmt = stmt.where(Job.parent_id == parent_id)
if status:
stmt = stmt.where(Job.status == status)
stmt = stmt.order_by(Job.created_at.desc())
return list(session.exec(stmt).all())

View File

@@ -114,17 +114,18 @@ class Job(SQLModel, table=True):
completed_at: Optional[datetime] = None
class Timeline(SQLModel, table=True):
"""The frame sequence from a source video."""
"""A user-created selection of source material."""
__tablename__ = "timeline"
id: UUID = Field(default_factory=uuid4, primary_key=True)
name: str = ""
source_asset_id: Optional[UUID] = Field(default=None, index=True)
source_video: str = ""
chunk_paths: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
profile_name: str = ""
status: str = "created"
fps: float = 2.0
frames_prefix: str = ""
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
frame_count: int = 0
source_ephemeral: bool = False
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Checkpoint(SQLModel, table=True):
@@ -135,13 +136,25 @@ class Checkpoint(SQLModel, table=True):
timeline_id: UUID
job_id: Optional[UUID] = Field(default=None, index=True)
parent_id: Optional[UUID] = None
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
stage_name: str = ""
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
is_scenario: bool = False
scenario_label: str = ""
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class StageOutput(SQLModel, table=True):
"""Output of a single stage within a job."""
__tablename__ = "stage_output"
id: UUID = Field(default_factory=uuid4, primary_key=True)
job_id: UUID = Field(index=True)
timeline_id: UUID
stage_name: str
checkpoint_id: Optional[UUID] = None
output: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Brand(SQLModel, table=True):
"""A brand discovered or registered in the system."""
__tablename__ = "brand"

View File

@@ -1,19 +1,31 @@
"""
Checkpoint system — Timeline + Checkpoint tree.
Checkpoint system — Timeline + Checkpoint tree + StageOutput.
detect/checkpoint/
frames.py — frame image S3 upload/download
storage.py — Timeline + Checkpoint (Postgres + MinIO)
replay.py — replay (TODO: migrate to new model)
frames.py — per-timeline frame cache (local filesystem)
storage.py — Timeline, Checkpoint, StageOutput persistence
replay.py — replay from checkpoint (TODO: rework in 5d)
runner_bridge.py — checkpoint hook for PipelineRunner
"""
from .storage import (
create_timeline,
get_timeline_frames,
get_timeline_frames_b64,
get_timeline,
update_timeline_status,
save_checkpoint,
get_checkpoints_for_job,
get_checkpoints_for_timeline,
save_stage_output,
load_stage_output,
load_stage_outputs_for_job,
load_stage_outputs_for_timeline,
)
from .frames import save_frames, load_frames
from .runner_bridge import checkpoint_after_stage, reset_checkpoint_state, get_timeline_id
from .frames import (
cache_exists,
cache_frames,
load_cached_frames,
load_cached_frames_b64,
clear_cache,
frames_to_b64,
)
from .runner_bridge import checkpoint_after_stage, reset_checkpoint_state, get_latest_checkpoint

View File

@@ -1,7 +1,19 @@
"""Frame image storage — save/load to S3/MinIO as JPEGs."""
"""
Frame cache — per-timeline frame storage in blob storage (S3/MinIO).
Frames are extracted from chunks once, cached as JPEGs at
cache/timelines/{timeline_id}/frames/{seq}.jpg in the app's
blob storage. Any job on the timeline reads from the cache.
Cache is clearable and rebuildable from chunks.
Uses the same storage backend as the rest of the app, so it
works across lambdas, GPU boxes, and local dev.
"""
from __future__ import annotations
import base64
import io
import logging
import os
import tempfile
@@ -14,25 +26,39 @@ from core.detect.models import Frame
logger = logging.getLogger(__name__)
BUCKET = os.environ.get("S3_BUCKET", "mpr")
CHECKPOINT_PREFIX = "checkpoints"
CACHE_PREFIX = "cache/timelines"
def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
def _frame_key(timeline_id: str, seq: int) -> str:
return f"{CACHE_PREFIX}/{timeline_id}/frames/{seq}.jpg"
def _list_prefix(timeline_id: str) -> str:
return f"{CACHE_PREFIX}/{timeline_id}/frames/"
def cache_exists(timeline_id: str) -> bool:
"""Check if frame cache exists for a timeline."""
from core.storage.s3 import list_objects
objects = list_objects(BUCKET, _list_prefix(timeline_id))
return len(objects) > 0
def cache_frames(timeline_id: str, frames: list[Frame], quality: int = 85) -> int:
"""
Save frame images to S3 as JPEGs.
Write frames to blob storage as JPEGs.
Returns manifest: {sequence: s3_key}
Returns number of frames cached.
"""
from core.storage.s3 import upload_file
manifest = {}
for frame in frames:
key = f"{CHECKPOINT_PREFIX}/{job_id}/frames/{frame.sequence}.jpg"
key = _frame_key(timeline_id, frame.sequence)
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
img = Image.fromarray(frame.image)
img.save(tmp, format="JPEG", quality=85)
img.save(tmp, format="JPEG", quality=quality)
tmp_path = tmp.name
try:
@@ -40,25 +66,30 @@ def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
finally:
os.unlink(tmp_path)
manifest[frame.sequence] = key
logger.info("Saved %d frames to s3://%s/%s/%s/frames/",
len(frames), BUCKET, CHECKPOINT_PREFIX, job_id)
return manifest
logger.info("Cached %d frames for timeline %s", len(frames), timeline_id)
return len(frames)
def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Frame]:
def load_cached_frames(timeline_id: str) -> list[Frame]:
"""
Load frame images from S3 and reconstitute Frame objects.
Load all cached frames as Frame objects with numpy arrays.
frame_metadata: list of dicts with sequence, chunk_id, timestamp, perceptual_hash.
Returns empty list if cache doesn't exist.
"""
from core.storage.s3 import download_to_temp
from core.storage.s3 import list_objects, download_to_temp
objects = list_objects(BUCKET, _list_prefix(timeline_id))
if not objects:
return []
meta_map = {m["sequence"]: m for m in frame_metadata}
frames = []
for obj in objects:
key = obj["key"]
filename = key.rsplit("/", 1)[-1]
if not filename.endswith(".jpg"):
continue
seq = int(filename.replace(".jpg", ""))
for seq, key in manifest.items():
tmp_path = download_to_temp(BUCKET, key)
try:
img = Image.open(tmp_path).convert("RGB")
@@ -66,13 +97,12 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
finally:
os.unlink(tmp_path)
meta = meta_map.get(seq, {})
frame = Frame(
sequence=seq,
chunk_id=meta.get("chunk_id", 0),
timestamp=meta.get("timestamp", 0.0),
chunk_id=0,
timestamp=0.0,
image=image_array,
perceptual_hash=meta.get("perceptual_hash", ""),
perceptual_hash="",
)
frames.append(frame)
@@ -80,32 +110,70 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
return frames
def load_frames_b64(manifest: dict[int, str], frame_metadata: list[dict]) -> list[dict]:
def load_cached_frames_b64(timeline_id: str) -> list[dict]:
"""
Load frame images from S3 as base64 JPEG — lightweight, no numpy.
Load cached frames as base64 JPEGs for the UI.
Returns list of dicts: {seq, timestamp, jpeg_b64}
Returns list of {seq, timestamp, jpeg_b64}.
"""
import base64
from core.storage.s3 import download_to_temp
from core.storage.s3 import list_objects, download_to_temp
meta_map = {m["sequence"]: m for m in frame_metadata}
frames = []
objects = list_objects(BUCKET, _list_prefix(timeline_id))
if not objects:
return []
result = []
for obj in objects:
key = obj["key"]
filename = key.rsplit("/", 1)[-1]
if not filename.endswith(".jpg"):
continue
seq = int(filename.replace(".jpg", ""))
for seq, key in manifest.items():
tmp_path = download_to_temp(BUCKET, key)
try:
with open(tmp_path, "rb") as f:
jpeg_bytes = f.read()
jpeg_b64 = base64.b64encode(f.read()).decode()
finally:
os.unlink(tmp_path)
meta = meta_map.get(seq, {})
frames.append({
result.append({
"seq": seq,
"timestamp": meta.get("timestamp", 0.0),
"jpeg_b64": base64.b64encode(jpeg_bytes).decode(),
"timestamp": 0.0,
"jpeg_b64": jpeg_b64,
})
frames.sort(key=lambda f: f["seq"])
return frames
result.sort(key=lambda f: f["seq"])
return result
def clear_cache(timeline_id: str):
"""Delete the frame cache for a timeline."""
from core.storage.s3 import delete_objects
prefix = _list_prefix(timeline_id)
delete_objects(BUCKET, prefix)
logger.info("Cleared frame cache for timeline %s", timeline_id)
def frames_to_b64(frames: list[Frame], quality: int = 75) -> list[dict]:
"""
Convert in-memory Frame objects to base64 JPEG dicts.
For API responses when frames are already in memory.
"""
result = []
for frame in frames:
buf = io.BytesIO()
img = Image.fromarray(frame.image)
img.save(buf, format="JPEG", quality=quality)
jpeg_b64 = base64.b64encode(buf.getvalue()).decode()
result.append({
"seq": frame.sequence,
"timestamp": frame.timestamp,
"jpeg_b64": jpeg_b64,
})
result.sort(key=lambda f: f["seq"])
return result

View File

@@ -1,13 +1,9 @@
"""
Runner bridge — checkpoint hook called by PipelineRunner after each stage.
Owns the per-job state (timeline, frame manifest, checkpoint chain) that
the runner shouldn't know about.
Timeline and Job are independent entities:
- One Timeline can serve multiple Jobs (re-run with different params)
- One Job operates on one Timeline (set after frame extraction)
- Checkpoints belong to Timeline, tagged with the Job that created them
Saves a checkpoint + stage output after each stage completes.
Timeline and Job are independent: timeline_id and job_id come from
the pipeline state (set at job creation time).
"""
from __future__ import annotations
@@ -16,63 +12,37 @@ import logging
logger = logging.getLogger(__name__)
# Per-job state
_timeline_id: dict[str, str] = {}
_frames_manifest: dict[str, dict[int, str]] = {}
# Per-job state: tracks the latest checkpoint so we can chain parent → child
_latest_checkpoint: dict[str, str] = {}
def reset_checkpoint_state(job_id: str):
"""Clean up per-job checkpoint state. Called when pipeline finishes."""
_timeline_id.pop(job_id, None)
_frames_manifest.pop(job_id, None)
_latest_checkpoint.pop(job_id, None)
def checkpoint_after_stage(job_id: str, stage_name: str, state: dict, result: dict):
"""
Save a checkpoint after a stage completes.
Save a checkpoint + stage output after a stage completes.
Called by the runner. Handles:
- Timeline creation (once, on extract_frames)
- Frame upload (via create_timeline)
- Stage output serialization (via stage registry)
- Checkpoint chain (parent → child)
- Stage output as separate row in StageOutput table
"""
if not job_id:
return
from .storage import create_timeline, save_stage_output
timeline_id = state.get("timeline_id", "")
if not timeline_id:
logger.warning("No timeline_id in state for job %s, skipping checkpoint", job_id)
return
from .storage import save_checkpoint, save_stage_output
from core.detect.stages.base import _REGISTRY
merged = {**state, **result}
# On extract_frames: create Timeline + upload frames + root checkpoint
if stage_name == "extract_frames" and job_id not in _timeline_id:
frames = merged.get("frames", [])
video_path = merged.get("video_path", "")
profile_name = merged.get("profile_name", "")
tid, cid = create_timeline(
source_video=video_path,
profile_name=profile_name,
frames=frames,
)
_timeline_id[job_id] = tid
_latest_checkpoint[job_id] = cid
logger.info("Job %s → Timeline %s (root checkpoint %s)", job_id, tid, cid)
# Emit timeline_id via SSE so the UI can use it for checkpoint loads
from core.detect import emit
emit.log(job_id, "Checkpoint", "INFO", f"timeline_id={tid}")
return
# For subsequent stages: save checkpoint on the timeline
tid = _timeline_id.get(job_id)
if not tid:
logger.warning("No timeline for job %s, skipping checkpoint", job_id)
return
# Serialize stage output using the stage's serialize_fn if available
stage_cls = _REGISTRY.get(stage_name)
serialize_fn = getattr(getattr(stage_cls, "definition", None), "serialize_fn", None)
@@ -81,17 +51,41 @@ def checkpoint_after_stage(job_id: str, stage_name: str, state: dict, result: di
else:
output_json = {}
# Convert stats dataclass to dict for JSONB storage
import dataclasses
raw_stats = state.get("stats", {})
if dataclasses.is_dataclass(raw_stats):
stats_dict = dataclasses.asdict(raw_stats)
elif isinstance(raw_stats, dict):
stats_dict = raw_stats
else:
stats_dict = {}
# Save checkpoint (lightweight tree node)
parent_id = _latest_checkpoint.get(job_id)
new_checkpoint_id = save_stage_output(
timeline_id=tid,
parent_checkpoint_id=parent_id,
checkpoint_id = save_checkpoint(
timeline_id=timeline_id,
stage_name=stage_name,
output_json=output_json,
parent_checkpoint_id=parent_id,
config_overrides=state.get("config_overrides"),
stats=stats_dict,
job_id=job_id,
)
_latest_checkpoint[job_id] = new_checkpoint_id
_latest_checkpoint[job_id] = checkpoint_id
# Save stage output (separate row, upsert by job+stage)
if output_json:
save_stage_output(
job_id=job_id,
timeline_id=timeline_id,
stage_name=stage_name,
output=output_json,
checkpoint_id=checkpoint_id,
)
logger.info("Checkpoint %s + output for stage %s (job %s)", checkpoint_id, stage_name, job_id)
def get_timeline_id(job_id: str) -> str | None:
"""Get the timeline_id for a running job. Used by the UI to load checkpoints."""
return _timeline_id.get(job_id)
def get_latest_checkpoint(job_id: str) -> str | None:
"""Get the latest checkpoint_id for a running job."""
return _latest_checkpoint.get(job_id)

View File

@@ -6,6 +6,9 @@ This file has no model-specific knowledge — stages own their data format.
The only things serialized here are the "envelope" fields (job_id, video_path, etc.)
that don't belong to any stage.
Frames are ephemeral (in-memory during a run). Serialization stores
metadata only; frames are re-extracted from chunks when needed.
"""
from __future__ import annotations
@@ -18,10 +21,10 @@ from core.schema.serializers.pipeline import (
# Envelope fields — not owned by any stage, always present
ENVELOPE_KEYS = ["job_id", "video_path", "profile_name", "config_overrides"]
ENVELOPE_KEYS = ["job_id", "video_path", "profile_name", "timeline_id", "config_overrides"]
def serialize_state(state: dict, frames_manifest: dict[int, str]) -> dict:
def serialize_state(state: dict) -> dict:
"""
Serialize DetectState to a JSON-compatible dict.
@@ -37,9 +40,6 @@ def serialize_state(state: dict, frames_manifest: dict[int, str]) -> dict:
default = {} if key == "config_overrides" else ""
checkpoint[key] = state.get(key, default)
# Frames manifest (needed by frame-loading stages)
checkpoint["frames_manifest"] = {str(k): v for k, v in frames_manifest.items()}
# Stats (shared across stages, not owned by one)
stats = state.get("stats")
if stats is not None:
@@ -60,8 +60,9 @@ def serialize_state(state: dict, frames_manifest: dict[int, str]) -> dict:
def deserialize_state(checkpoint: dict, frames: list) -> dict:
"""
Reconstitute DetectState from a checkpoint dict + loaded frames.
Reconstitute DetectState from a checkpoint dict + frames.
Frames are provided by the caller (re-extracted from chunks).
Calls each stage's deserialize_fn to restore stage-owned data.
"""
from core.detect.stages.base import _REGISTRY
@@ -75,7 +76,7 @@ def deserialize_state(checkpoint: dict, frames: list) -> dict:
default = {} if key == "config_overrides" else ""
state[key] = checkpoint.get(key, default)
# Frames (always present, loaded externally)
# Frames (provided externally, ephemeral)
state["frames"] = frames
# Stats

View File

@@ -1,9 +1,9 @@
"""
Checkpoint storage — Timeline + Checkpoint (tree of snapshots).
Checkpoint storage — Timeline, Checkpoint, StageOutput persistence.
Timeline: frame sequence from source video (frames in MinIO)
Checkpoint: snapshot of pipeline state (stage outputs as JSONB in Postgres)
parent_id forms a tree — multiple children = different config tries
Timeline: user-created source selection (chunk paths)
Checkpoint: lightweight tree node (parent_id, stage_name, config, stats)
StageOutput: per-stage result (flat table, one row per job+stage)
"""
from __future__ import annotations
@@ -11,8 +11,6 @@ from __future__ import annotations
import logging
from uuid import UUID
from .frames import save_frames, load_frames, CHECKPOINT_PREFIX
logger = logging.getLogger(__name__)
@@ -21,104 +19,83 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
def create_timeline(
source_video: str,
profile_name: str,
frames: list,
fps: float = 2.0,
chunk_paths: list[str],
profile_name: str = "",
name: str = "",
source_asset_id: UUID | None = None,
) -> tuple[str, str]:
fps: float = 2.0,
) -> str:
"""
Create a timeline from frames. Uploads frame images to MinIO,
creates Timeline + root Checkpoint in Postgres.
Create a timeline from a chunk selection.
Returns (timeline_id, checkpoint_id).
Called by the user (via API) before any pipeline runs.
Returns timeline_id.
"""
from core.db.models import Timeline, Checkpoint
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
timeline = Timeline(
source_video=source_video,
name=name,
chunk_paths=chunk_paths,
profile_name=profile_name,
source_asset_id=source_asset_id,
fps=fps,
status="created",
)
session.add(timeline)
session.flush()
session.commit()
session.refresh(timeline)
tid = str(timeline.id)
# Upload frames to MinIO
manifest = save_frames(tid, frames)
logger.info("Timeline created: %s (%d chunks)", tid, len(chunk_paths))
return tid
frames_meta = [
{
"sequence": f.sequence,
"chunk_id": getattr(f, "chunk_id", 0),
"timestamp": f.timestamp,
"perceptual_hash": getattr(f, "perceptual_hash", ""),
def get_timeline(timeline_id: str) -> dict:
"""Load a timeline as a dict."""
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
if not timeline:
raise ValueError(f"Timeline not found: {timeline_id}")
return {
"id": str(timeline.id),
"name": timeline.name,
"chunk_paths": timeline.chunk_paths,
"profile_name": timeline.profile_name,
"status": timeline.status,
"fps": timeline.fps,
"source_asset_id": str(timeline.source_asset_id) if timeline.source_asset_id else None,
"created_at": str(timeline.created_at) if timeline.created_at else None,
}
for f in frames
]
timeline.frames_prefix = f"{CHECKPOINT_PREFIX}/{tid}/frames/"
timeline.frames_manifest = {str(k): v for k, v in manifest.items()}
timeline.frames_meta = frames_meta
checkpoint = Checkpoint(
timeline_id=timeline.id,
parent_id=None,
stage_outputs={},
stats={"frames_extracted": len(frames)},
)
session.add(checkpoint)
def update_timeline_status(timeline_id: str, status: str, frame_count: int | None = None):
"""Update timeline status and optionally frame count."""
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
if timeline:
timeline.status = status
if frame_count is not None:
timeline.frame_count = frame_count
session.commit()
session.refresh(checkpoint)
cid = str(checkpoint.id)
logger.info("Timeline created: %s (%d frames, root checkpoint %s)", tid, len(frames), cid)
return tid, cid
def get_timeline_frames(timeline_id: str) -> list:
"""Load frames from a timeline (from MinIO) as Frame objects."""
from core.db.models import Timeline
from core.db.connection import get_session
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
if not timeline:
raise ValueError(f"Timeline not found: {timeline_id}")
raw_manifest = timeline.frames_manifest or {}
manifest = {int(k): v for k, v in raw_manifest.items()}
return load_frames(manifest, timeline.frames_meta or [])
def get_timeline_frames_b64(timeline_id: str) -> list[dict]:
"""Load frames as base64 JPEG (lightweight, no numpy)."""
from core.db.models import Timeline
from core.db.connection import get_session
from .frames import load_frames_b64
with get_session() as session:
timeline = session.get(Timeline, UUID(timeline_id))
if not timeline:
raise ValueError(f"Timeline not found: {timeline_id}")
raw_manifest = timeline.frames_manifest or {}
manifest = {int(k): v for k, v in raw_manifest.items()}
return load_frames_b64(manifest, timeline.frames_meta or [])
# ---------------------------------------------------------------------------
# Checkpoint
# ---------------------------------------------------------------------------
def save_stage_output(
def save_checkpoint(
timeline_id: str,
parent_checkpoint_id: str | None,
stage_name: str,
output_json: dict,
parent_checkpoint_id: str | None = None,
config_overrides: dict | None = None,
stats: dict | None = None,
is_scenario: bool = False,
@@ -126,32 +103,22 @@ def save_stage_output(
job_id: str | None = None,
) -> str:
"""
Save a stage's output as a new checkpoint (child of parent).
Save a checkpoint (lightweight tree node).
Carries forward stage outputs from parent + adds the new one.
No stage outputs — those go in StageOutput table separately.
Returns the new checkpoint ID.
"""
from core.db.models import Checkpoint
from core.db.connection import get_session
with get_session() as session:
parent_outputs = {}
parent_stats = {}
parent_config = {}
if parent_checkpoint_id:
parent = session.get(Checkpoint, UUID(parent_checkpoint_id))
if parent:
parent_outputs = dict(parent.stage_outputs or {})
parent_stats = dict(parent.stats or {})
parent_config = dict(parent.config_overrides or {})
checkpoint = Checkpoint(
timeline_id=UUID(timeline_id),
job_id=UUID(job_id) if job_id else None,
parent_id=UUID(parent_checkpoint_id) if parent_checkpoint_id else None,
stage_outputs={**parent_outputs, stage_name: output_json},
config_overrides={**parent_config, **(config_overrides or {})},
stats={**parent_stats, **(stats or {})},
stage_name=stage_name,
config_overrides=config_overrides or {},
stats=stats or {},
is_scenario=is_scenario,
scenario_label=scenario_label,
)
@@ -165,13 +132,172 @@ def save_stage_output(
return cid
def load_stage_output(checkpoint_id: str, stage_name: str) -> dict | None:
"""Load a stage's output from a checkpoint."""
def get_checkpoints_for_job(job_id: str) -> list[dict]:
"""List checkpoints for a job, ordered by creation time."""
from sqlmodel import select
from core.db.models import Checkpoint
from core.db.connection import get_session
with get_session() as session:
checkpoint = session.get(Checkpoint, UUID(checkpoint_id))
if not checkpoint:
stmt = (
select(Checkpoint)
.where(Checkpoint.job_id == UUID(job_id))
.order_by(Checkpoint.created_at)
)
checkpoints = session.exec(stmt).all()
return [
{
"id": str(c.id),
"timeline_id": str(c.timeline_id),
"job_id": str(c.job_id) if c.job_id else None,
"parent_id": str(c.parent_id) if c.parent_id else None,
"stage_name": c.stage_name,
"config_overrides": c.config_overrides or {},
"stats": c.stats or {},
"is_scenario": c.is_scenario,
"scenario_label": c.scenario_label,
"created_at": str(c.created_at) if c.created_at else None,
}
for c in checkpoints
]
def get_checkpoints_for_timeline(timeline_id: str) -> list[dict]:
"""List all checkpoints on a timeline, ordered by creation time."""
from sqlmodel import select
from core.db.models import Checkpoint
from core.db.connection import get_session
with get_session() as session:
stmt = (
select(Checkpoint)
.where(Checkpoint.timeline_id == UUID(timeline_id))
.order_by(Checkpoint.created_at)
)
checkpoints = session.exec(stmt).all()
return [
{
"id": str(c.id),
"timeline_id": str(c.timeline_id),
"job_id": str(c.job_id) if c.job_id else None,
"parent_id": str(c.parent_id) if c.parent_id else None,
"stage_name": c.stage_name,
"config_overrides": c.config_overrides or {},
"stats": c.stats or {},
"is_scenario": c.is_scenario,
"scenario_label": c.scenario_label,
"created_at": str(c.created_at) if c.created_at else None,
}
for c in checkpoints
]
# ---------------------------------------------------------------------------
# StageOutput
# ---------------------------------------------------------------------------
def save_stage_output(
job_id: str,
timeline_id: str,
stage_name: str,
output: dict,
checkpoint_id: str | None = None,
) -> str:
"""
Save (upsert) a stage output. One row per (job_id, stage_name).
Returns the stage_output ID.
"""
from sqlmodel import select
from core.db.models import StageOutput
from core.db.connection import get_session
with get_session() as session:
# Upsert: check if exists
stmt = (
select(StageOutput)
.where(StageOutput.job_id == UUID(job_id))
.where(StageOutput.stage_name == stage_name)
)
existing = session.exec(stmt).first()
if existing:
existing.output = output
existing.checkpoint_id = UUID(checkpoint_id) if checkpoint_id else None
session.commit()
session.refresh(existing)
return str(existing.id)
stage_output = StageOutput(
job_id=UUID(job_id),
timeline_id=UUID(timeline_id),
stage_name=stage_name,
checkpoint_id=UUID(checkpoint_id) if checkpoint_id else None,
output=output,
)
session.add(stage_output)
session.commit()
session.refresh(stage_output)
return str(stage_output.id)
def load_stage_output(job_id: str, stage_name: str) -> dict | None:
"""Load a stage's output by job + stage name."""
from sqlmodel import select
from core.db.models import StageOutput
from core.db.connection import get_session
with get_session() as session:
stmt = (
select(StageOutput)
.where(StageOutput.job_id == UUID(job_id))
.where(StageOutput.stage_name == stage_name)
)
row = session.exec(stmt).first()
if not row:
return None
return (checkpoint.stage_outputs or {}).get(stage_name)
return row.output
def load_stage_outputs_for_job(job_id: str) -> dict[str, dict]:
"""Load all stage outputs for a job. Returns {stage_name: output}."""
from sqlmodel import select
from core.db.models import StageOutput
from core.db.connection import get_session
with get_session() as session:
stmt = (
select(StageOutput)
.where(StageOutput.job_id == UUID(job_id))
)
rows = session.exec(stmt).all()
return {row.stage_name: row.output for row in rows}
def load_stage_outputs_for_timeline(timeline_id: str, stage_name: str | None = None) -> list[dict]:
"""Load stage outputs for a timeline, optionally filtered by stage."""
from sqlmodel import select
from core.db.models import StageOutput
from core.db.connection import get_session
with get_session() as session:
stmt = select(StageOutput).where(StageOutput.timeline_id == UUID(timeline_id))
if stage_name:
stmt = stmt.where(StageOutput.stage_name == stage_name)
rows = session.exec(stmt).all()
return [
{
"id": str(r.id),
"job_id": str(r.job_id),
"stage_name": r.stage_name,
"checkpoint_id": str(r.checkpoint_id) if r.checkpoint_id else None,
"output": r.output,
"created_at": str(r.created_at) if r.created_at else None,
}
for r in rows
]

View File

@@ -98,6 +98,15 @@ def node_extract_frames(state: DetectState) -> dict:
frames = extract_frames(state["video_path"], config, job_id=job_id)
span.set_output({"frames_extracted": len(frames)})
# Cache frames on the timeline for reuse across jobs and UI
timeline_id = state.get("timeline_id")
if timeline_id:
from core.detect.checkpoint.frames import cache_frames, cache_exists
if not cache_exists(timeline_id):
cache_frames(timeline_id, frames)
from core.detect.checkpoint.storage import update_timeline_status
update_timeline_status(timeline_id, "cached", frame_count=len(frames))
_emit(state, "extract_frames", "done")
return {"frames": frames, "stats": PipelineStats(frames_extracted=len(frames))}

View File

@@ -12,8 +12,7 @@ from core.schema.serializers._common import (
)
from core.schema.serializers.pipeline import (
serialize_frame_meta,
serialize_frames_with_upload as serialize_frames,
deserialize_frames_with_download as deserialize_frames,
serialize_frames_meta,
serialize_text_candidate,
serialize_text_candidates,
deserialize_text_candidate,

View File

@@ -2,18 +2,19 @@
from core.detect.stages.models import StageDefinition, StageIO, StageConfigField
from core.detect.stages.base import register_stage
from ._serializers import serialize_frames, deserialize_frames
from ._serializers import serialize_frame_meta
def _ser_extract(state: dict, job_id: str) -> dict:
frames = state.get("frames", [])
meta, manifest = serialize_frames(frames, job_id)
return {"frames_meta": meta, "frames_manifest": manifest}
meta = [serialize_frame_meta(f) for f in frames]
return {"frames_meta": meta, "frame_count": len(frames)}
def _deser_extract(data: dict, job_id: str) -> dict:
frames = deserialize_frames(data["frames_meta"], data["frames_manifest"], job_id)
return {"frames": frames}
# Frames are ephemeral — re-extract from chunks on demand.
# Store metadata so we know what was extracted.
return {"_frames_meta": data.get("frames_meta", [])}
def _ser_filter(state: dict, job_id: str) -> dict:

View File

@@ -16,6 +16,7 @@ class DetectState(TypedDict, total=False):
# Input
video_path: str
job_id: str
timeline_id: str
profile_name: str
source_asset_id: str # UUID of the source MediaAsset

View File

@@ -115,13 +115,13 @@ class SegmentFieldRequest(BaseModel):
class SegmentFieldResponse(BaseModel):
"""Response from field segmentation."""
boundary: List[List[int]] = Field(default_factory=list)
boundary: List[str] = Field(default_factory=list)
coverage: float = 0.0
mask_b64: str = ""
class SegmentFieldDebugResponse(BaseModel):
"""Response from field segmentation with debug overlay."""
boundary: List[List[int]] = Field(default_factory=list)
boundary: List[str] = Field(default_factory=list)
coverage: float = 0.0
mask_overlay_b64: str = ""

View File

@@ -28,6 +28,7 @@ from .grpc import (
from .job import Job, JobStatus, RunType
from .timeline import Timeline
from .checkpoint import Checkpoint
from .stage_output import StageOutput
from .brand import BrandSource, Brand
from .media import AssetStatus, MediaAsset
from .profile import Profile
@@ -41,7 +42,7 @@ from .source import ChunkInfo, SourceJob, SourceType
# Core domain models - generates SQLModel, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset,
Job, Timeline, Checkpoint, Brand, Profile]
Job, Timeline, Checkpoint, StageOutput, Brand, Profile]
# API request/response models
API_MODELS = [

View File

@@ -11,25 +11,24 @@ class Checkpoint:
"""
A snapshot of pipeline state on a timeline.
Stage outputs stored as JSONB — each stage serializes to JSON,
the checkpoint stores it without knowing the shape.
parent_id forms a tree: multiple children from the same parent
= different config tries from the same starting point.
Stage outputs are stored separately in StageOutput table,
not carried in the checkpoint itself.
"""
id: UUID
timeline_id: UUID
job_id: Optional[UUID] = None # which job created this checkpoint
parent_id: Optional[UUID] = None # null = root checkpoint
job_id: Optional[UUID] = None
parent_id: Optional[UUID] = None
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
stage_outputs: Dict[str, Any] = field(default_factory=dict)
stage_name: str = "" # which stage produced this checkpoint
# Config that produced this checkpoint
config_overrides: Dict[str, Any] = field(default_factory=dict)
# Pipeline state
# Pipeline stats at this point
stats: Dict[str, Any] = field(default_factory=dict)
# Scenario bookmark

View File

@@ -38,7 +38,7 @@ class Job:
video_path: str
profile_name: str = "soccer_broadcast"
# Timeline — set after frame extraction, or upfront for replay jobs
# Timeline — set at job creation (timeline exists before any job)
timeline_id: Optional[UUID] = None
# Lineage

View File

@@ -0,0 +1,27 @@
"""StageOutput schema — per-stage result storage."""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, Optional
from uuid import UUID
@dataclass
class StageOutput:
"""
Output of a single stage within a job.
Flat table with composite unique (job_id, stage_name).
Upserted on each stage completion. Independently queryable —
"give me all edge detection outputs for this timeline."
"""
id: UUID
job_id: UUID
timeline_id: UUID
stage_name: str
checkpoint_id: Optional[UUID] = None
output: Dict[str, Any] = field(default_factory=dict)
created_at: Optional[datetime] = None

View File

@@ -1,4 +1,4 @@
"""Timeline schema — source of truth for frame sequences."""
"""Timeline schema — source of truth for source material sequences."""
from dataclasses import dataclass, field
from datetime import datetime
@@ -9,21 +9,27 @@ from uuid import UUID
@dataclass
class Timeline:
"""
The frame sequence from a source video.
A user-created selection of source material.
Independent of stages — exists before any stage runs.
Frames stored in MinIO as JPEGs, metadata here.
One timeline per job.
Exists before any job runs. Holds source references (chunk paths,
asset IDs) and extraction config.
Frame cache: extracted frames live at media/timelines/{id}/frames/
as JPEGs. Any job on this timeline reads from the cache. Cache is
rebuildable from chunks (clear + re-extract). For ephemeral sources
(streams), the cache is the only record.
Many jobs can work on the same timeline.
"""
id: UUID
name: str = ""
source_asset_id: Optional[UUID] = None
source_video: str = ""
chunk_paths: List[str] = field(default_factory=list)
profile_name: str = ""
status: str = "created" # created | cached | ready
fps: float = 2.0
frames_prefix: str = "" # s3: timeline/{id}/frames/
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
frame_count: int = 0
source_ephemeral: bool = False # True for streams — cache can't be rebuilt
created_at: Optional[datetime] = None

View File

@@ -2,7 +2,7 @@
Serializers for detection pipeline runtime models.
Special handling:
- Frame.image (np.ndarray → S3, excluded from JSON)
- Frame.image (np.ndarray, ephemeral — only metadata serialized)
- TextCandidate.frame (object ref → frame_sequence integer)
Everything else uses dataclasses.asdict() via safe_construct.
"""
@@ -24,7 +24,7 @@ from ._common import safe_construct, serialize_dataclass, serialize_dataclass_li
# ---------------------------------------------------------------------------
# Frame — image goes to S3 separately
# Frame — metadata only (image is ephemeral, re-extracted from chunks)
# ---------------------------------------------------------------------------
def serialize_frame_meta(frame: Frame) -> dict:
@@ -34,21 +34,9 @@ def serialize_frame_meta(frame: Frame) -> dict:
return result
def serialize_frames_with_upload(frames: list[Frame], job_id: str) -> tuple[list[dict], dict[int, str]]:
"""Upload frame images to S3, return metadata + manifest."""
from core.detect.checkpoint.frames import save_frames
manifest = save_frames(job_id, frames)
meta = [serialize_frame_meta(f) for f in frames]
return meta, manifest
def deserialize_frames_with_download(meta: list[dict], manifest: dict, job_id: str) -> list[Frame]:
"""Load frames from S3 + metadata."""
from core.detect.checkpoint.frames import load_frames
int_manifest = {int(k): v for k, v in manifest.items()}
return load_frames(int_manifest, meta)
def serialize_frames_meta(frames: list[Frame]) -> list[dict]:
"""Serialize frame metadata for all frames."""
return [serialize_frame_meta(f) for f in frames]
# ---------------------------------------------------------------------------

View File

@@ -80,6 +80,18 @@ def upload_file(local_path: str, bucket: str, key: str) -> None:
s3.upload_file(local_path, bucket, key)
def delete_objects(bucket: str, prefix: str) -> int:
"""Delete all objects under a prefix. Returns count of deleted objects."""
s3 = get_s3_client()
objects = list_objects(bucket, prefix)
if not objects:
return 0
delete_keys = [{"Key": obj["key"]} for obj in objects]
s3.delete_objects(Bucket=bucket, Delete={"Objects": delete_keys})
return len(delete_keys)
def get_presigned_url(bucket: str, key: str, expires: int = 3600) -> str:
"""Generate a presigned URL for an S3 object."""
s3 = get_s3_client()

View File

@@ -77,13 +77,14 @@ export interface Job {
export interface Timeline {
id: string;
name: string;
source_asset_id: string | null;
source_video: string;
chunk_paths: string[];
profile_name: string;
status: string;
fps: number;
frames_prefix: string;
frames_manifest: Record<string, unknown>;
frames_meta: string[];
frame_count: number;
source_ephemeral: boolean;
created_at: string | null;
}
@@ -92,7 +93,7 @@ export interface Checkpoint {
timeline_id: string;
job_id: string | null;
parent_id: string | null;
stage_outputs: Record<string, unknown>;
stage_name: string;
config_overrides: Record<string, unknown>;
stats: Record<string, unknown>;
is_scenario: boolean;
@@ -100,6 +101,16 @@ export interface Checkpoint {
created_at: string | null;
}
export interface StageOutput {
id: string;
job_id: string;
timeline_id: string;
stage_name: string;
checkpoint_id: string | null;
output: Record<string, unknown>;
created_at: string | null;
}
export interface Brand {
id: string;
canonical_name: string;

View File

@@ -29,35 +29,15 @@ export function useCheckpointLoader(
stripSelEndOverride.value ?? Math.max(0, checkpointFrames.value.length - 1),
)
// Cache job_id → timeline_id mappings
const timelineCache = new Map<string, string>()
// Track current frame from SSE
source.on<{ frame_ref: number; jpeg_b64: string }>('frame_update', (e) => {
currentFrameImage.value = e.jpeg_b64
currentFrameRef.value = e.frame_ref
})
async function resolveTimelineId(job: string): Promise<string | null> {
if (timelineCache.has(job)) return timelineCache.get(job)!
try {
const resp = await fetch(`/api/detect/timeline/${job}`)
if (!resp.ok) return null
const data = await resp.json()
const tid = data.timeline_id
if (tid) timelineCache.set(job, tid)
return tid
} catch {
return null
}
}
async function loadCheckpoint(job: string, stage: string) {
try {
// Resolve timeline_id from job_id
const timelineId = await resolveTimelineId(job)
const lookupId = timelineId ?? job
const lookupId = pipeline.timelineId || job
const resp = await fetch(`/api/detect/checkpoints/${lookupId}/${stage}`)
if (!resp.ok) return

View File

@@ -56,10 +56,13 @@ export function useStageRegistry() {
}
/**
* Stages that have config fields (and thus can open a parameter editor).
* Stages that have a visual stage editor (canvas + overlays + sliders).
* Add stage names here when a visual editor is implemented for them.
*/
const STAGE_EDITORS = new Set(['detect_edges', 'field_segmentation'])
const editableStages = computed(() =>
stages.value.filter(s => s.config_fields.length > 0).map(s => s.name)
stages.value.filter(s => STAGE_EDITORS.has(s.name)).map(s => s.name)
)
function getStage(name: string): StageInfo | undefined {

View File

@@ -2,6 +2,7 @@
import { ref, onMounted } from 'vue'
import { Panel } from 'mpr-ui-framework'
import { usePipelineStore } from '../stores/pipeline'
import type { Timeline, Job } from '@common/types/generated'
const pipeline = usePipelineStore()
@@ -32,6 +33,9 @@ interface ProfileInfo {
const sources = ref<SourceInfo[]>([])
const chunks = ref<ChunkInfo[]>([])
const profiles = ref<ProfileInfo[]>([])
const timelines = ref<Timeline[]>([])
const timelineJobs = ref<Job[]>([])
const selectedTimeline = ref<string | null>(null)
const selectedSource = ref<string | null>(null)
const selectedChunks = ref<Set<string>>(new Set())
const selectedProfile = ref('soccer_broadcast')
@@ -48,9 +52,10 @@ async function loadSources() {
loading.value = true
error.value = null
try {
const [srcResp, profResp] = await Promise.all([
const [srcResp, profResp, tlResp] = await Promise.all([
fetch('/api/detect/sources'),
fetch('/api/detect/config/profiles'),
fetch('/api/detect/timeline'),
])
if (!srcResp.ok) throw new Error(`${srcResp.status} ${srcResp.statusText}`)
sources.value = await srcResp.json()
@@ -61,6 +66,10 @@ async function loadSources() {
selectedProfile.value = profiles.value[0].name
}
}
if (tlResp.ok) {
timelines.value = await tlResp.json()
}
} catch (e: any) {
error.value = `Failed to load sources: ${e.message}`
} finally {
@@ -81,6 +90,32 @@ async function loadChunks(jobId: string) {
}
}
async function selectTimeline(tl: Timeline) {
selectedTimeline.value = tl.id
timelineJobs.value = []
try {
const resp = await fetch(`/api/detect/jobs?timeline_id=${tl.id}`)
if (!resp.ok) throw new Error(`${resp.status}`)
timelineJobs.value = await resp.json()
} catch (e: any) {
error.value = `Failed to load jobs: ${e.message}`
}
}
function loadJob(job: Job) {
// Navigate to the job — full page load so all panels initialize with the job context
const url = new URL(window.location.href)
url.searchParams.set('job', job.id)
url.hash = ''
window.location.href = url.toString()
}
function formatDate(dateStr: string | null): string {
if (!dateStr) return ''
const d = new Date(dateStr)
return d.toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })
}
function toggleChunk(chunk: ChunkInfo) {
const s = new Set(selectedChunks.value)
if (s.has(chunk.key)) {
@@ -125,15 +160,34 @@ async function runPipeline() {
running.value = true
error.value = null
// Run first selected chunk (multi-run queuing is future work)
const videoPath = [...selectedChunks.value][0]
const chunkPaths = [...selectedChunks.value]
try {
const resp = await fetch('/api/detect/run', {
// 1. Create timeline from chunk selection
const tlResp = await fetch('/api/detect/timeline', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
video_path: videoPath,
chunk_paths: chunkPaths,
profile_name: selectedProfile.value,
source_asset_id: selectedSource.value || '',
name: chunkPaths.length === 1
? chunkPaths[0].split('/').pop() ?? ''
: `${chunkPaths.length} chunks`,
}),
})
if (!tlResp.ok) {
const detail = await tlResp.text()
throw new Error(`Timeline creation failed: ${tlResp.status}: ${detail}`)
}
const timeline = await tlResp.json()
// 2. Run pipeline on the timeline
const runResp = await fetch('/api/detect/run', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
timeline_id: timeline.id,
profile_name: selectedProfile.value,
checkpoint: checkpoint.value,
skip_vlm: skipVlm.value,
@@ -142,12 +196,13 @@ async function runPipeline() {
pause_after_stage: pauseAfterStage.value,
}),
})
if (!resp.ok) {
const detail = await resp.text()
throw new Error(`${resp.status}: ${detail}`)
if (!runResp.ok) {
const detail = await runResp.text()
throw new Error(`Pipeline start failed: ${runResp.status}: ${detail}`)
}
const data = await resp.json()
const data = await runResp.json()
pipeline.setTimelineId(timeline.id)
emit('job-started', data.job_id, { pauseAfterStage: pauseAfterStage.value })
} catch (e: any) {
error.value = `Failed to start pipeline: ${e.message}`
@@ -255,6 +310,42 @@ onMounted(loadSources)
</button>
</div>
<!-- Recent timelines + jobs -->
<div class="source-section" v-if="timelines.length > 0">
<h3>Recent Timelines</h3>
<div class="source-list">
<div
v-for="tl in timelines"
:key="tl.id"
:class="['source-item', { selected: selectedTimeline === tl.id }]"
@click="selectTimeline(tl)"
>
<span class="source-id">{{ tl.name || tl.id.slice(0, 12) }}</span>
<span class="source-meta">
<span class="source-type-badge">{{ tl.status.toUpperCase() }}</span>
<span v-if="tl.frame_count" class="source-count">{{ tl.frame_count }} frames</span>
<span class="source-size">{{ formatDate(tl.created_at) }}</span>
</span>
</div>
</div>
<!-- Jobs for selected timeline -->
<div v-if="selectedTimeline && timelineJobs.length > 0" class="job-list">
<div
v-for="job in timelineJobs"
:key="job.id"
class="job-item"
@click="loadJob(job)"
>
<span class="job-id">{{ job.id.slice(0, 8) }}</span>
<span :class="['job-status', job.status]">{{ job.status }}</span>
<span v-if="job.current_stage" class="job-stage">{{ job.current_stage.replace(/_/g, ' ') }}</span>
<span class="job-date">{{ formatDate(job.created_at) }}</span>
</div>
</div>
<div v-else-if="selectedTimeline" class="source-empty">No jobs yet</div>
</div>
<div class="source-actions">
<button class="editor-close" @click="pipeline.closeEditor()"> Close</button>
</div>
@@ -465,4 +556,56 @@ onMounted(loadSources)
background: var(--status-error);
color: #000;
}
/* Job list */
.job-list {
background: var(--surface-2);
border-radius: var(--panel-radius);
padding: var(--space-1);
margin-top: var(--space-1);
}
.job-item {
display: flex;
align-items: center;
gap: var(--space-2);
padding: var(--space-1) var(--space-2);
border-radius: 3px;
cursor: pointer;
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
.job-item:hover {
background: var(--surface-3);
}
.job-id {
font-family: var(--font-mono);
color: var(--text-dim);
}
.job-status {
font-size: 10px;
font-weight: 700;
padding: 1px 4px;
border-radius: 2px;
}
.job-status.completed { color: var(--status-live); background: rgba(0, 255, 128, 0.1); }
.job-status.failed { color: var(--status-error); background: rgba(224, 82, 82, 0.1); }
.job-status.running { color: var(--status-processing); background: rgba(255, 213, 79, 0.1); }
.job-status.cancelled { color: var(--text-dim); background: var(--surface-3); }
.job-status.pending { color: var(--text-dim); }
.job-stage {
color: var(--text-dim);
font-size: 10px;
}
.job-date {
color: var(--text-dim);
font-size: 10px;
margin-left: auto;
}
</style>

View File

@@ -12,6 +12,7 @@ import type { CheckpointInfo } from '../types/sse-contract'
export const usePipelineStore = defineStore('pipeline', () => {
const jobId = ref('')
const timelineId = ref('')
const status = ref<string>('idle')
const nodes = ref<NodeState[]>([])
const currentStage = ref<string | null>(null)
@@ -35,6 +36,10 @@ export const usePipelineStore = defineStore('pipeline', () => {
jobId.value = id
}
function setTimelineId(id: string) {
timelineId.value = id
}
function setStatus(s: string) {
status.value = s
}
@@ -92,13 +97,14 @@ export const usePipelineStore = defineStore('pipeline', () => {
parentJobId.value = null
runType.value = 'initial'
error.value = null
timelineId.value = ''
}
return {
jobId, status, nodes, currentStage, runId, parentJobId, runType,
jobId, timelineId, status, nodes, currentStage, runId, parentJobId, runType,
checkpoints, error, layoutMode, editorStage, sourceHasSelection,
isRunning, isPaused, canReplay, isEditing,
setJob, setStatus, updateNodes, setRunContext, setCheckpoints, setError,
setJob, setTimelineId, setStatus, updateNodes, setRunContext, setCheckpoints, setError,
openSourceSelector, openBBoxEditor, openStageEditor, closeEditor, reset,
}
})

View File

@@ -11,7 +11,8 @@
"skipLibCheck": true,
"baseUrl": ".",
"paths": {
"@/*": ["src/*"]
"@/*": ["src/*"],
"@common/*": ["../common/*"]
}
},
"include": ["src/**/*.ts", "src/**/*.vue"]

View File

@@ -8,6 +8,7 @@ export default defineConfig({
resolve: {
alias: {
'@': resolve(__dirname, 'src'),
'@common': resolve(__dirname, '../common'),
},
},
server: {

View File

@@ -116,8 +116,7 @@ const flowNodes = computed(() =>
status: n.status,
...appearance,
hasCheckpoint: n.hasCheckpoint ?? false,
hasRegionEditor: regionStageSet.value.has(n.id),
hasEditors: (n.availableEditors?.length ?? 0) > 0,
hasStageEditor: regionStageSet.value.has(n.id),
isRunning: n.status === 'running',
isActive: n.id === props.activeStage,
},
@@ -190,9 +189,9 @@ function onNodeClick(id: string) {
<span class="stage-actions">
<button
v-if="data.hasRegionEditor"
class="stage-btn region-btn"
title="Region editor"
v-if="data.hasStageEditor"
class="stage-btn editor-btn"
title="Stage editor"
@click.stop="emit('open-region-editor', id)"
>
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" stroke="currentColor" stroke-width="1.5">