refactor stage 1

2026-03-27 04:23:21 -03:00
parent df6bcb01e8
commit 291ac8dd40
14 changed files with 688 additions and 450 deletions
--- a/tests/detect/manual/list_scenarios.py
+++ b/tests/detect/manual/list_scenarios.py
@@ -45,15 +45,15 @@ def main():
        return

    logger.info("")
-    logger.info("%3s  %-35s %-12s %-18s %6s  %s", "#", "Label", "Job ID", "Stage", "Frames", "Created")
-    logger.info("─" * 100)
+    logger.info("%3s  %-35s %-12s %6s  %s", "#", "Label", "Timeline", "Stages", "Created")
+    logger.info("─" * 80)

    for i, s in enumerate(scenarios, 1):
-        manifest = s.frames_manifest or {}
        created = str(s.created_at)[:19] if s.created_at else "—"
-        job_short = str(s.job_id)[:8]
-        logger.info("%3d  %-35s %-12s %-18s %6d  %s",
-                    i, s.scenario_label, job_short, s.stage, len(manifest), created)
+        tid_short = str(s.timeline_id)[:8]
+        stage_count = len(s.stage_outputs or {})
+        logger.info("%3d  %-35s %-12s %6d  %s",
+                    i, s.scenario_label, tid_short, stage_count, created)

    logger.info("")

@@ -73,7 +73,7 @@ def main():
            logger.error("Scenario not found: %s", args.open)
            return

-        url = f"{args.base_url}?job={target.job_id}#/editor/detect_edges"
+        url = f"{args.base_url}?job={target.timeline_id}#/editor/detect_edges"
        logger.info("Opening: %s", url)
        webbrowser.open(url)
    else:
--- a/tests/detect/manual/seed_scenario.py
+++ b/tests/detect/manual/seed_scenario.py
@@ -1,26 +1,20 @@
 #!/usr/bin/env python3
 """
-Seed a scenario checkpoint from a video chunk.
+Seed a scenario from a video chunk.

-Extracts frames via ffmpeg, uploads to MinIO, creates a StageCheckpoint
-in Postgres marked as a scenario. No pipeline, no Redis, no SSE.
+Creates a Timeline (frames in MinIO) + Branch + Checkpoint marked
+as a scenario. No pipeline, no Redis, no SSE.

 Prerequisites:
-  - Postgres reachable (port-forward or local)
-  - MinIO reachable (port-forward or local)
+  - Postgres reachable (Kind NodePort or local)
+  - MinIO reachable (Kind NodePort or local)

 Usage:
-    # With K8s port-forwards:
-    kubectl port-forward svc/postgres 5432:5432 &
-    kubectl port-forward svc/minio 9000:9000 &
-
    python tests/detect/manual/seed_scenario.py
-
-    # Custom video:
    python tests/detect/manual/seed_scenario.py --video media/mpr/out/chunks/.../chunk_0001.mp4

 Then open:
-    http://mpr.local.ar/detection/?job=<JOB_ID>#/editor/detect_edges
+    http://mpr.local.ar/detection/?job=<TIMELINE_ID>#/editor/detect_edges
 """

 from __future__ import annotations
@@ -31,7 +25,7 @@ import os
 import sys
 import uuid

-parser = argparse.ArgumentParser(description="Seed a scenario checkpoint")
+parser = argparse.ArgumentParser(description="Seed a scenario")
 parser.add_argument("--video",
                    default="media/mpr/out/chunks/95043d50-4df6-4ac8-bbd5-2ba873117c6e/chunk_0000.mp4")
 parser.add_argument("--label", default="chelsea_edges_default",
@@ -44,7 +38,6 @@ parser.add_argument("--s3-url",
                    default=os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000"))
 args = parser.parse_args()

-# Set env before imports
 os.environ["DATABASE_URL"] = args.db_url
 os.environ["S3_ENDPOINT_URL"] = args.s3_url
 os.environ.setdefault("AWS_ACCESS_KEY_ID", "minioadmin")
@@ -57,7 +50,7 @@ logger = logging.getLogger(__name__)


 def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):
-    """Extract frames using ffmpeg subprocess — no pipeline dependencies."""
+    """Extract frames using ffmpeg — no pipeline dependencies."""
    import subprocess
    import tempfile
    from pathlib import Path
@@ -82,7 +75,7 @@ def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):

    frames = []
    for jpg in sorted(Path(tmpdir).glob("frame_*.jpg")):
-        seq = int(jpg.stem.split("_")[1]) - 1  # 0-indexed
+        seq = int(jpg.stem.split("_")[1]) - 1
        img = Image.open(jpg).convert("RGB")
        image_array = np.array(img)
        frame = Frame(
@@ -99,7 +92,6 @@ def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):


 def main():
-    job_id = str(uuid.uuid4())
    video_path = args.video

    if not os.path.exists(video_path):
@@ -107,7 +99,6 @@ def main():
        sys.exit(1)

    logger.info("Video: %s", video_path)
-    logger.info("Job ID: %s", job_id)
    logger.info("Label: %s", args.label)

    # Ensure DB tables exist
@@ -119,57 +110,37 @@ def main():
    frames = extract_frames_ffmpeg(video_path, args.fps, args.max_frames)
    logger.info("Extracted %d frames", len(frames))

-    # Upload frames to MinIO
-    from detect.checkpoint.frames import save_frames
-    logger.info("Uploading frames to MinIO...")
-    manifest = save_frames(job_id, frames)
-    logger.info("Uploaded %d frames", len(manifest))
+    # Create timeline + branch + checkpoint
+    from detect.checkpoint.storage import create_timeline, save_stage_output

-    # Build frame metadata
-    frames_meta = [
-        {
-            "sequence": f.sequence,
-            "chunk_id": f.chunk_id,
-            "timestamp": f.timestamp,
-            "perceptual_hash": "",
-        }
-        for f in frames
-    ]
-
-    # All frames are "filtered" (no scene filter ran)
-    filtered_sequences = [f.sequence for f in frames]
-
-    # Save checkpoint as scenario
-    from core.db.detect import save_stage_checkpoint
-    from detect.checkpoint.frames import CHECKPOINT_PREFIX
-
-    checkpoint = save_stage_checkpoint(
-        job_id=job_id,
-        stage="filter_scenes",
-        stage_index=1,
-        frames_prefix=f"{CHECKPOINT_PREFIX}/{job_id}/frames/",
-        frames_manifest={str(k): v for k, v in manifest.items()},
-        frames_meta=frames_meta,
-        filtered_frame_sequences=filtered_sequences,
-        stage_output_key="",
-        stats={"frames_extracted": len(frames), "frames_after_scene_filter": len(frames)},
-        config_snapshot={},
-        config_overrides={},
-        video_path=video_path,
+    timeline_id, branch_id = create_timeline(
+        source_video=video_path,
        profile_name="soccer_broadcast",
-        is_scenario=True,
-        scenario_label=args.label,
+        frames=frames,
+        fps=args.fps,
    )

+    # Mark as scenario
+    from core.db.detect import get_latest_checkpoint
+    from core.db.connection import get_session
+
+    checkpoint = get_latest_checkpoint(branch_id)
+    if checkpoint:
+        checkpoint.is_scenario = True
+        checkpoint.scenario_label = args.label
+        with get_session() as session:
+            session.add(checkpoint)
+            session.commit()
+
    logger.info("")
    logger.info("Scenario created:")
-    logger.info("  ID: %s", checkpoint.id)
-    logger.info("  Job: %s", job_id)
+    logger.info("  Timeline: %s", timeline_id)
+    logger.info("  Branch: %s", branch_id)
    logger.info("  Label: %s", args.label)
    logger.info("  Frames: %d", len(frames))
    logger.info("")
    logger.info("Open in editor:")
-    logger.info("  http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", job_id)
+    logger.info("  http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", timeline_id)


 if __name__ == "__main__":
--- a/tests/detect/test_stage_registry.py
+++ b/tests/detect/test_stage_registry.py
@@ -1,6 +1,7 @@
 """Tests for the stage registry."""

 from detect.stages import list_stages, get_stage, get_palette
+from detect.stages.base import get_stage_class


 EXPECTED_STAGES = [
@@ -26,9 +27,17 @@ def test_stage_has_io():

 def test_stage_has_serialization():
    for name in EXPECTED_STAGES:
-        stage = get_stage(name)
-        assert stage.serialize_fn is not None, f"{name} has no serialize_fn"
-        assert stage.deserialize_fn is not None, f"{name} has no deserialize_fn"
+        defn = get_stage(name)
+        stage_cls = get_stage_class(name)
+        if stage_cls is not None:
+            # New-style: serialization on the class
+            instance = stage_cls()
+            assert hasattr(instance, 'serialize'), f"{name} has no serialize method"
+            assert hasattr(instance, 'deserialize'), f"{name} has no deserialize method"
+        else:
+            # Legacy: serialization on the definition
+            assert defn.serialize_fn is not None, f"{name} has no serialize_fn"
+            assert defn.deserialize_fn is not None, f"{name} has no deserialize_fn"


 def test_palette_groups():