refactor stage 1

This commit is contained in:
2026-03-27 04:23:21 -03:00
parent df6bcb01e8
commit 291ac8dd40
14 changed files with 688 additions and 450 deletions

View File

@@ -45,15 +45,15 @@ def main():
return
logger.info("")
logger.info("%3s %-35s %-12s %-18s %6s %s", "#", "Label", "Job ID", "Stage", "Frames", "Created")
logger.info("" * 100)
logger.info("%3s %-35s %-12s %6s %s", "#", "Label", "Timeline", "Stages", "Created")
logger.info("" * 80)
for i, s in enumerate(scenarios, 1):
manifest = s.frames_manifest or {}
created = str(s.created_at)[:19] if s.created_at else ""
job_short = str(s.job_id)[:8]
logger.info("%3d %-35s %-12s %-18s %6d %s",
i, s.scenario_label, job_short, s.stage, len(manifest), created)
tid_short = str(s.timeline_id)[:8]
stage_count = len(s.stage_outputs or {})
logger.info("%3d %-35s %-12s %6d %s",
i, s.scenario_label, tid_short, stage_count, created)
logger.info("")
@@ -73,7 +73,7 @@ def main():
logger.error("Scenario not found: %s", args.open)
return
url = f"{args.base_url}?job={target.job_id}#/editor/detect_edges"
url = f"{args.base_url}?job={target.timeline_id}#/editor/detect_edges"
logger.info("Opening: %s", url)
webbrowser.open(url)
else:

View File

@@ -1,26 +1,20 @@
#!/usr/bin/env python3
"""
Seed a scenario checkpoint from a video chunk.
Seed a scenario from a video chunk.
Extracts frames via ffmpeg, uploads to MinIO, creates a StageCheckpoint
in Postgres marked as a scenario. No pipeline, no Redis, no SSE.
Creates a Timeline (frames in MinIO) + Branch + Checkpoint marked
as a scenario. No pipeline, no Redis, no SSE.
Prerequisites:
- Postgres reachable (port-forward or local)
- MinIO reachable (port-forward or local)
- Postgres reachable (Kind NodePort or local)
- MinIO reachable (Kind NodePort or local)
Usage:
# With K8s port-forwards:
kubectl port-forward svc/postgres 5432:5432 &
kubectl port-forward svc/minio 9000:9000 &
python tests/detect/manual/seed_scenario.py
# Custom video:
python tests/detect/manual/seed_scenario.py --video media/mpr/out/chunks/.../chunk_0001.mp4
Then open:
http://mpr.local.ar/detection/?job=<JOB_ID>#/editor/detect_edges
http://mpr.local.ar/detection/?job=<TIMELINE_ID>#/editor/detect_edges
"""
from __future__ import annotations
@@ -31,7 +25,7 @@ import os
import sys
import uuid
parser = argparse.ArgumentParser(description="Seed a scenario checkpoint")
parser = argparse.ArgumentParser(description="Seed a scenario")
parser.add_argument("--video",
default="media/mpr/out/chunks/95043d50-4df6-4ac8-bbd5-2ba873117c6e/chunk_0000.mp4")
parser.add_argument("--label", default="chelsea_edges_default",
@@ -44,7 +38,6 @@ parser.add_argument("--s3-url",
default=os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000"))
args = parser.parse_args()
# Set env before imports
os.environ["DATABASE_URL"] = args.db_url
os.environ["S3_ENDPOINT_URL"] = args.s3_url
os.environ.setdefault("AWS_ACCESS_KEY_ID", "minioadmin")
@@ -57,7 +50,7 @@ logger = logging.getLogger(__name__)
def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):
"""Extract frames using ffmpeg subprocess — no pipeline dependencies."""
"""Extract frames using ffmpeg — no pipeline dependencies."""
import subprocess
import tempfile
from pathlib import Path
@@ -82,7 +75,7 @@ def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):
frames = []
for jpg in sorted(Path(tmpdir).glob("frame_*.jpg")):
seq = int(jpg.stem.split("_")[1]) - 1 # 0-indexed
seq = int(jpg.stem.split("_")[1]) - 1
img = Image.open(jpg).convert("RGB")
image_array = np.array(img)
frame = Frame(
@@ -99,7 +92,6 @@ def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):
def main():
job_id = str(uuid.uuid4())
video_path = args.video
if not os.path.exists(video_path):
@@ -107,7 +99,6 @@ def main():
sys.exit(1)
logger.info("Video: %s", video_path)
logger.info("Job ID: %s", job_id)
logger.info("Label: %s", args.label)
# Ensure DB tables exist
@@ -119,57 +110,37 @@ def main():
frames = extract_frames_ffmpeg(video_path, args.fps, args.max_frames)
logger.info("Extracted %d frames", len(frames))
# Upload frames to MinIO
from detect.checkpoint.frames import save_frames
logger.info("Uploading frames to MinIO...")
manifest = save_frames(job_id, frames)
logger.info("Uploaded %d frames", len(manifest))
# Create timeline + branch + checkpoint
from detect.checkpoint.storage import create_timeline, save_stage_output
# Build frame metadata
frames_meta = [
{
"sequence": f.sequence,
"chunk_id": f.chunk_id,
"timestamp": f.timestamp,
"perceptual_hash": "",
}
for f in frames
]
# All frames are "filtered" (no scene filter ran)
filtered_sequences = [f.sequence for f in frames]
# Save checkpoint as scenario
from core.db.detect import save_stage_checkpoint
from detect.checkpoint.frames import CHECKPOINT_PREFIX
checkpoint = save_stage_checkpoint(
job_id=job_id,
stage="filter_scenes",
stage_index=1,
frames_prefix=f"{CHECKPOINT_PREFIX}/{job_id}/frames/",
frames_manifest={str(k): v for k, v in manifest.items()},
frames_meta=frames_meta,
filtered_frame_sequences=filtered_sequences,
stage_output_key="",
stats={"frames_extracted": len(frames), "frames_after_scene_filter": len(frames)},
config_snapshot={},
config_overrides={},
video_path=video_path,
timeline_id, branch_id = create_timeline(
source_video=video_path,
profile_name="soccer_broadcast",
is_scenario=True,
scenario_label=args.label,
frames=frames,
fps=args.fps,
)
# Mark as scenario
from core.db.detect import get_latest_checkpoint
from core.db.connection import get_session
checkpoint = get_latest_checkpoint(branch_id)
if checkpoint:
checkpoint.is_scenario = True
checkpoint.scenario_label = args.label
with get_session() as session:
session.add(checkpoint)
session.commit()
logger.info("")
logger.info("Scenario created:")
logger.info(" ID: %s", checkpoint.id)
logger.info(" Job: %s", job_id)
logger.info(" Timeline: %s", timeline_id)
logger.info(" Branch: %s", branch_id)
logger.info(" Label: %s", args.label)
logger.info(" Frames: %d", len(frames))
logger.info("")
logger.info("Open in editor:")
logger.info(" http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", job_id)
logger.info(" http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", timeline_id)
if __name__ == "__main__":

View File

@@ -1,6 +1,7 @@
"""Tests for the stage registry."""
from detect.stages import list_stages, get_stage, get_palette
from detect.stages.base import get_stage_class
EXPECTED_STAGES = [
@@ -26,9 +27,17 @@ def test_stage_has_io():
def test_stage_has_serialization():
for name in EXPECTED_STAGES:
stage = get_stage(name)
assert stage.serialize_fn is not None, f"{name} has no serialize_fn"
assert stage.deserialize_fn is not None, f"{name} has no deserialize_fn"
defn = get_stage(name)
stage_cls = get_stage_class(name)
if stage_cls is not None:
# New-style: serialization on the class
instance = stage_cls()
assert hasattr(instance, 'serialize'), f"{name} has no serialize method"
assert hasattr(instance, 'deserialize'), f"{name} has no deserialize method"
else:
# Legacy: serialization on the definition
assert defn.serialize_fn is not None, f"{name} has no serialize_fn"
assert defn.deserialize_fn is not None, f"{name} has no deserialize_fn"
def test_palette_groups():