a
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
"""
|
||||
Checkpoint storage — Timeline + Checkpoint (tree of snapshots).
|
||||
Checkpoint storage — Timeline, Checkpoint, StageOutput persistence.
|
||||
|
||||
Timeline: frame sequence from source video (frames in MinIO)
|
||||
Checkpoint: snapshot of pipeline state (stage outputs as JSONB in Postgres)
|
||||
parent_id forms a tree — multiple children = different config tries
|
||||
Timeline: user-created source selection (chunk paths)
|
||||
Checkpoint: lightweight tree node (parent_id, stage_name, config, stats)
|
||||
StageOutput: per-stage result (flat table, one row per job+stage)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -11,8 +11,6 @@ from __future__ import annotations
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
from .frames import save_frames, load_frames, CHECKPOINT_PREFIX
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -21,66 +19,41 @@ logger = logging.getLogger(__name__)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def create_timeline(
|
||||
source_video: str,
|
||||
profile_name: str,
|
||||
frames: list,
|
||||
fps: float = 2.0,
|
||||
chunk_paths: list[str],
|
||||
profile_name: str = "",
|
||||
name: str = "",
|
||||
source_asset_id: UUID | None = None,
|
||||
) -> tuple[str, str]:
|
||||
fps: float = 2.0,
|
||||
) -> str:
|
||||
"""
|
||||
Create a timeline from frames. Uploads frame images to MinIO,
|
||||
creates Timeline + root Checkpoint in Postgres.
|
||||
Create a timeline from a chunk selection.
|
||||
|
||||
Returns (timeline_id, checkpoint_id).
|
||||
Called by the user (via API) before any pipeline runs.
|
||||
Returns timeline_id.
|
||||
"""
|
||||
from core.db.models import Timeline, Checkpoint
|
||||
from core.db.models import Timeline
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
timeline = Timeline(
|
||||
source_video=source_video,
|
||||
name=name,
|
||||
chunk_paths=chunk_paths,
|
||||
profile_name=profile_name,
|
||||
source_asset_id=source_asset_id,
|
||||
fps=fps,
|
||||
status="created",
|
||||
)
|
||||
session.add(timeline)
|
||||
session.flush()
|
||||
session.commit()
|
||||
session.refresh(timeline)
|
||||
tid = str(timeline.id)
|
||||
|
||||
# Upload frames to MinIO
|
||||
manifest = save_frames(tid, frames)
|
||||
|
||||
frames_meta = [
|
||||
{
|
||||
"sequence": f.sequence,
|
||||
"chunk_id": getattr(f, "chunk_id", 0),
|
||||
"timestamp": f.timestamp,
|
||||
"perceptual_hash": getattr(f, "perceptual_hash", ""),
|
||||
}
|
||||
for f in frames
|
||||
]
|
||||
|
||||
timeline.frames_prefix = f"{CHECKPOINT_PREFIX}/{tid}/frames/"
|
||||
timeline.frames_manifest = {str(k): v for k, v in manifest.items()}
|
||||
timeline.frames_meta = frames_meta
|
||||
|
||||
checkpoint = Checkpoint(
|
||||
timeline_id=timeline.id,
|
||||
parent_id=None,
|
||||
stage_outputs={},
|
||||
stats={"frames_extracted": len(frames)},
|
||||
)
|
||||
session.add(checkpoint)
|
||||
session.commit()
|
||||
session.refresh(checkpoint)
|
||||
cid = str(checkpoint.id)
|
||||
|
||||
logger.info("Timeline created: %s (%d frames, root checkpoint %s)", tid, len(frames), cid)
|
||||
return tid, cid
|
||||
logger.info("Timeline created: %s (%d chunks)", tid, len(chunk_paths))
|
||||
return tid
|
||||
|
||||
|
||||
def get_timeline_frames(timeline_id: str) -> list:
|
||||
"""Load frames from a timeline (from MinIO) as Frame objects."""
|
||||
def get_timeline(timeline_id: str) -> dict:
|
||||
"""Load a timeline as a dict."""
|
||||
from core.db.models import Timeline
|
||||
from core.db.connection import get_session
|
||||
|
||||
@@ -89,36 +62,40 @@ def get_timeline_frames(timeline_id: str) -> list:
|
||||
if not timeline:
|
||||
raise ValueError(f"Timeline not found: {timeline_id}")
|
||||
|
||||
raw_manifest = timeline.frames_manifest or {}
|
||||
manifest = {int(k): v for k, v in raw_manifest.items()}
|
||||
return load_frames(manifest, timeline.frames_meta or [])
|
||||
return {
|
||||
"id": str(timeline.id),
|
||||
"name": timeline.name,
|
||||
"chunk_paths": timeline.chunk_paths,
|
||||
"profile_name": timeline.profile_name,
|
||||
"status": timeline.status,
|
||||
"fps": timeline.fps,
|
||||
"source_asset_id": str(timeline.source_asset_id) if timeline.source_asset_id else None,
|
||||
"created_at": str(timeline.created_at) if timeline.created_at else None,
|
||||
}
|
||||
|
||||
|
||||
def get_timeline_frames_b64(timeline_id: str) -> list[dict]:
|
||||
"""Load frames as base64 JPEG (lightweight, no numpy)."""
|
||||
def update_timeline_status(timeline_id: str, status: str, frame_count: int | None = None):
|
||||
"""Update timeline status and optionally frame count."""
|
||||
from core.db.models import Timeline
|
||||
from core.db.connection import get_session
|
||||
from .frames import load_frames_b64
|
||||
|
||||
with get_session() as session:
|
||||
timeline = session.get(Timeline, UUID(timeline_id))
|
||||
if not timeline:
|
||||
raise ValueError(f"Timeline not found: {timeline_id}")
|
||||
|
||||
raw_manifest = timeline.frames_manifest or {}
|
||||
manifest = {int(k): v for k, v in raw_manifest.items()}
|
||||
return load_frames_b64(manifest, timeline.frames_meta or [])
|
||||
if timeline:
|
||||
timeline.status = status
|
||||
if frame_count is not None:
|
||||
timeline.frame_count = frame_count
|
||||
session.commit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Checkpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def save_stage_output(
|
||||
def save_checkpoint(
|
||||
timeline_id: str,
|
||||
parent_checkpoint_id: str | None,
|
||||
stage_name: str,
|
||||
output_json: dict,
|
||||
parent_checkpoint_id: str | None = None,
|
||||
config_overrides: dict | None = None,
|
||||
stats: dict | None = None,
|
||||
is_scenario: bool = False,
|
||||
@@ -126,32 +103,22 @@ def save_stage_output(
|
||||
job_id: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Save a stage's output as a new checkpoint (child of parent).
|
||||
Save a checkpoint (lightweight tree node).
|
||||
|
||||
Carries forward stage outputs from parent + adds the new one.
|
||||
No stage outputs — those go in StageOutput table separately.
|
||||
Returns the new checkpoint ID.
|
||||
"""
|
||||
from core.db.models import Checkpoint
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
parent_outputs = {}
|
||||
parent_stats = {}
|
||||
parent_config = {}
|
||||
if parent_checkpoint_id:
|
||||
parent = session.get(Checkpoint, UUID(parent_checkpoint_id))
|
||||
if parent:
|
||||
parent_outputs = dict(parent.stage_outputs or {})
|
||||
parent_stats = dict(parent.stats or {})
|
||||
parent_config = dict(parent.config_overrides or {})
|
||||
|
||||
checkpoint = Checkpoint(
|
||||
timeline_id=UUID(timeline_id),
|
||||
job_id=UUID(job_id) if job_id else None,
|
||||
parent_id=UUID(parent_checkpoint_id) if parent_checkpoint_id else None,
|
||||
stage_outputs={**parent_outputs, stage_name: output_json},
|
||||
config_overrides={**parent_config, **(config_overrides or {})},
|
||||
stats={**parent_stats, **(stats or {})},
|
||||
stage_name=stage_name,
|
||||
config_overrides=config_overrides or {},
|
||||
stats=stats or {},
|
||||
is_scenario=is_scenario,
|
||||
scenario_label=scenario_label,
|
||||
)
|
||||
@@ -165,13 +132,172 @@ def save_stage_output(
|
||||
return cid
|
||||
|
||||
|
||||
def load_stage_output(checkpoint_id: str, stage_name: str) -> dict | None:
|
||||
"""Load a stage's output from a checkpoint."""
|
||||
def get_checkpoints_for_job(job_id: str) -> list[dict]:
|
||||
"""List checkpoints for a job, ordered by creation time."""
|
||||
from sqlmodel import select
|
||||
from core.db.models import Checkpoint
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
checkpoint = session.get(Checkpoint, UUID(checkpoint_id))
|
||||
if not checkpoint:
|
||||
stmt = (
|
||||
select(Checkpoint)
|
||||
.where(Checkpoint.job_id == UUID(job_id))
|
||||
.order_by(Checkpoint.created_at)
|
||||
)
|
||||
checkpoints = session.exec(stmt).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": str(c.id),
|
||||
"timeline_id": str(c.timeline_id),
|
||||
"job_id": str(c.job_id) if c.job_id else None,
|
||||
"parent_id": str(c.parent_id) if c.parent_id else None,
|
||||
"stage_name": c.stage_name,
|
||||
"config_overrides": c.config_overrides or {},
|
||||
"stats": c.stats or {},
|
||||
"is_scenario": c.is_scenario,
|
||||
"scenario_label": c.scenario_label,
|
||||
"created_at": str(c.created_at) if c.created_at else None,
|
||||
}
|
||||
for c in checkpoints
|
||||
]
|
||||
|
||||
|
||||
def get_checkpoints_for_timeline(timeline_id: str) -> list[dict]:
|
||||
"""List all checkpoints on a timeline, ordered by creation time."""
|
||||
from sqlmodel import select
|
||||
from core.db.models import Checkpoint
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(Checkpoint)
|
||||
.where(Checkpoint.timeline_id == UUID(timeline_id))
|
||||
.order_by(Checkpoint.created_at)
|
||||
)
|
||||
checkpoints = session.exec(stmt).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": str(c.id),
|
||||
"timeline_id": str(c.timeline_id),
|
||||
"job_id": str(c.job_id) if c.job_id else None,
|
||||
"parent_id": str(c.parent_id) if c.parent_id else None,
|
||||
"stage_name": c.stage_name,
|
||||
"config_overrides": c.config_overrides or {},
|
||||
"stats": c.stats or {},
|
||||
"is_scenario": c.is_scenario,
|
||||
"scenario_label": c.scenario_label,
|
||||
"created_at": str(c.created_at) if c.created_at else None,
|
||||
}
|
||||
for c in checkpoints
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# StageOutput
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def save_stage_output(
|
||||
job_id: str,
|
||||
timeline_id: str,
|
||||
stage_name: str,
|
||||
output: dict,
|
||||
checkpoint_id: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Save (upsert) a stage output. One row per (job_id, stage_name).
|
||||
|
||||
Returns the stage_output ID.
|
||||
"""
|
||||
from sqlmodel import select
|
||||
from core.db.models import StageOutput
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
# Upsert: check if exists
|
||||
stmt = (
|
||||
select(StageOutput)
|
||||
.where(StageOutput.job_id == UUID(job_id))
|
||||
.where(StageOutput.stage_name == stage_name)
|
||||
)
|
||||
existing = session.exec(stmt).first()
|
||||
|
||||
if existing:
|
||||
existing.output = output
|
||||
existing.checkpoint_id = UUID(checkpoint_id) if checkpoint_id else None
|
||||
session.commit()
|
||||
session.refresh(existing)
|
||||
return str(existing.id)
|
||||
|
||||
stage_output = StageOutput(
|
||||
job_id=UUID(job_id),
|
||||
timeline_id=UUID(timeline_id),
|
||||
stage_name=stage_name,
|
||||
checkpoint_id=UUID(checkpoint_id) if checkpoint_id else None,
|
||||
output=output,
|
||||
)
|
||||
session.add(stage_output)
|
||||
session.commit()
|
||||
session.refresh(stage_output)
|
||||
return str(stage_output.id)
|
||||
|
||||
|
||||
def load_stage_output(job_id: str, stage_name: str) -> dict | None:
|
||||
"""Load a stage's output by job + stage name."""
|
||||
from sqlmodel import select
|
||||
from core.db.models import StageOutput
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(StageOutput)
|
||||
.where(StageOutput.job_id == UUID(job_id))
|
||||
.where(StageOutput.stage_name == stage_name)
|
||||
)
|
||||
row = session.exec(stmt).first()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
return (checkpoint.stage_outputs or {}).get(stage_name)
|
||||
return row.output
|
||||
|
||||
|
||||
def load_stage_outputs_for_job(job_id: str) -> dict[str, dict]:
|
||||
"""Load all stage outputs for a job. Returns {stage_name: output}."""
|
||||
from sqlmodel import select
|
||||
from core.db.models import StageOutput
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
stmt = (
|
||||
select(StageOutput)
|
||||
.where(StageOutput.job_id == UUID(job_id))
|
||||
)
|
||||
rows = session.exec(stmt).all()
|
||||
|
||||
return {row.stage_name: row.output for row in rows}
|
||||
|
||||
|
||||
def load_stage_outputs_for_timeline(timeline_id: str, stage_name: str | None = None) -> list[dict]:
|
||||
"""Load stage outputs for a timeline, optionally filtered by stage."""
|
||||
from sqlmodel import select
|
||||
from core.db.models import StageOutput
|
||||
from core.db.connection import get_session
|
||||
|
||||
with get_session() as session:
|
||||
stmt = select(StageOutput).where(StageOutput.timeline_id == UUID(timeline_id))
|
||||
if stage_name:
|
||||
stmt = stmt.where(StageOutput.stage_name == stage_name)
|
||||
rows = session.exec(stmt).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": str(r.id),
|
||||
"job_id": str(r.job_id),
|
||||
"stage_name": r.stage_name,
|
||||
"checkpoint_id": str(r.checkpoint_id) if r.checkpoint_id else None,
|
||||
"output": r.output,
|
||||
"created_at": str(r.created_at) if r.created_at else None,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user