phase cv 0

2026-03-26 22:22:35 -03:00
parent beb0416280
commit 65814b5b9e
46 changed files with 2962 additions and 268 deletions
--- a/tests/detect/manual/list_scenarios.py
+++ b/tests/detect/manual/list_scenarios.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+List available scenarios and open one in the browser.
+
+Usage:
+    python tests/detect/manual/list_scenarios.py          # list all
+    python tests/detect/manual/list_scenarios.py --open 1  # open scenario #1
+    python tests/detect/manual/list_scenarios.py --open chelsea_edges_default  # by label
+
+Prerequisites:
+    kubectl port-forward svc/postgres 5432:5432 &
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import sys
+import webbrowser
+
+parser = argparse.ArgumentParser(description="List and open scenarios")
+parser.add_argument("--open", type=str, default=None,
+                    help="Open scenario by number (1-based) or label")
+parser.add_argument("--db-url",
+                    default=os.environ.get("DATABASE_URL", "postgresql://mpr:mpr@localhost:5432/mpr"))
+parser.add_argument("--base-url", default="http://mpr.local.ar/detection/")
+args = parser.parse_args()
+
+os.environ["DATABASE_URL"] = args.db_url
+sys.path.insert(0, ".")
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)-7s %(name)s — %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def main():
+    from core.db.detect import list_scenarios
+
+    scenarios = list_scenarios()
+
+    if not scenarios:
+        logger.info("No scenarios found. Create one with:")
+        logger.info("  python tests/detect/manual/seed_scenario.py")
+        return
+
+    logger.info("")
+    logger.info("%3s  %-35s %-12s %-18s %6s  %s", "#", "Label", "Job ID", "Stage", "Frames", "Created")
+    logger.info("─" * 100)
+
+    for i, s in enumerate(scenarios, 1):
+        manifest = s.frames_manifest or {}
+        created = str(s.created_at)[:19] if s.created_at else "—"
+        job_short = str(s.job_id)[:8]
+        logger.info("%3d  %-35s %-12s %-18s %6d  %s",
+                    i, s.scenario_label, job_short, s.stage, len(manifest), created)
+
+    logger.info("")
+
+    if args.open:
+        target = None
+        try:
+            idx = int(args.open) - 1
+            if 0 <= idx < len(scenarios):
+                target = scenarios[idx]
+        except ValueError:
+            for s in scenarios:
+                if s.scenario_label == args.open:
+                    target = s
+                    break
+
+        if not target:
+            logger.error("Scenario not found: %s", args.open)
+            return
+
+        url = f"{args.base_url}?job={target.job_id}#/editor/detect_edges"
+        logger.info("Opening: %s", url)
+        webbrowser.open(url)
+    else:
+        logger.info("To open a scenario:")
+        logger.info("  python tests/detect/manual/list_scenarios.py --open 1")
+        logger.info("  python tests/detect/manual/list_scenarios.py --open chelsea_edges_default")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/detect/manual/run_region_analysis.py
+++ b/tests/detect/manual/run_region_analysis.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+Run edge detection on test video frames — visual verification.
+
+Uses a minimal 3-stage pipeline: extract_frames → filter_scenes → detect_edges.
+No YOLO, OCR, or downstream stages.
+
+Usage:
+    python tests/detect/manual/run_region_analysis.py [--job JOB_ID] [--port PORT] [--local]
+
+Opens: http://mpr.local.ar/detection/?job=<JOB_ID>
+
+What to look for in the frame viewer:
+  - "Edges" toggle appears (cyan)
+  - Cyan boxes around horizontal line pairs (hoarding edges)
+  - No boxes on players, ball, or sky
+  - Boxes concentrated in the lower third of the frame
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time as _time
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--job", default=f"cv-{int(_time.time()) % 100000}")
+parser.add_argument("--port", type=int, default=6379)
+parser.add_argument("--local", action="store_true", help="Run CV locally (no inference server)")
+args = parser.parse_args()
+
+os.environ["REDIS_URL"] = f"redis://localhost:{args.port}/0"
+if args.local:
+    os.environ.pop("INFERENCE_URL", None)
+
+logging.basicConfig(level=logging.DEBUG, format="%(levelname)-7s %(name)s — %(message)s")
+
+sys.path.insert(0, ".")
+
+from langgraph.graph import END, StateGraph
+
+from detect import emit
+from detect.models import PipelineStats
+from detect.profiles.soccer import SoccerBroadcastProfile
+from detect.stages.frame_extractor import extract_frames
+from detect.stages.scene_filter import scene_filter
+from detect.stages.edge_detector import detect_edge_regions
+from detect.state import DetectState
+
+logger = logging.getLogger(__name__)
+
+VIDEO = "media/mpr/out/chunks/95043d50-4df6-4ac8-bbd5-2ba873117c6e/chunk_0000.mp4"
+INFERENCE_URL = os.environ.get("INFERENCE_URL")
+
+
+# --- 3-stage pipeline ---
+
+NODES = ["extract_frames", "filter_scenes", "detect_edges"]
+
+
+def _emit_transition(job_id: str, node: str, status: str, node_states: dict):
+    node_states[node] = status
+    nodes = [{"id": n, "status": node_states.get(n, "pending")} for n in NODES]
+    emit.graph_update(job_id, nodes)
+
+
+def node_extract(state: DetectState) -> dict:
+    job_id = state.get("job_id", "")
+    ns = state.get("_node_states", {n: "pending" for n in NODES})
+    _emit_transition(job_id, "extract_frames", "running", ns)
+
+    profile = SoccerBroadcastProfile()
+    config = profile.frame_extraction_config()
+    frames = extract_frames(state["video_path"], config, job_id=job_id)
+
+    _emit_transition(job_id, "extract_frames", "done", ns)
+    return {"frames": frames, "stats": PipelineStats(frames_extracted=len(frames)), "_node_states": ns}
+
+
+def node_filter(state: DetectState) -> dict:
+    job_id = state.get("job_id", "")
+    ns = state.get("_node_states", {})
+    _emit_transition(job_id, "filter_scenes", "running", ns)
+
+    profile = SoccerBroadcastProfile()
+    config = profile.scene_filter_config()
+    kept = scene_filter(state.get("frames", []), config, job_id=job_id)
+
+    stats = state.get("stats", PipelineStats())
+    stats.frames_after_scene_filter = len(kept)
+
+    _emit_transition(job_id, "filter_scenes", "done", ns)
+    return {"filtered_frames": kept, "stats": stats, "_node_states": ns}
+
+
+def node_edges(state: DetectState) -> dict:
+    job_id = state.get("job_id", "")
+    ns = state.get("_node_states", {})
+    _emit_transition(job_id, "detect_edges", "running", ns)
+
+    profile = SoccerBroadcastProfile()
+    config = profile.region_analysis_config()
+    regions = detect_edge_regions(
+        state.get("filtered_frames", []), config,
+        inference_url=INFERENCE_URL, job_id=job_id,
+    )
+    total = sum(len(r) for r in regions.values())
+
+    stats = state.get("stats", PipelineStats())
+    stats.cv_regions_detected = total
+
+    _emit_transition(job_id, "detect_edges", "done", ns)
+    return {"edge_regions_by_frame": regions, "stats": stats, "_node_states": ns}
+
+
+def build_3stage_graph() -> StateGraph:
+    graph = StateGraph(DetectState)
+    graph.add_node("extract_frames", node_extract)
+    graph.add_node("filter_scenes", node_filter)
+    graph.add_node("detect_edges", node_edges)
+    graph.set_entry_point("extract_frames")
+    graph.add_edge("extract_frames", "filter_scenes")
+    graph.add_edge("filter_scenes", "detect_edges")
+    graph.add_edge("detect_edges", END)
+    return graph
+
+
+def main():
+    logger.info("Job: %s", args.job)
+    logger.info("Mode: %s", "remote" if INFERENCE_URL else "local")
+    logger.info("Pipeline: extract_frames → filter_scenes → detect_edges")
+    logger.info("Open: http://mpr.local.ar/detection/?job=%s", args.job)
+    input("\nPress Enter to start...")
+
+    emit.set_run_context(run_id=args.job, parent_job_id=args.job, run_type="initial", log_level="DEBUG")
+
+    graph = build_3stage_graph()
+    pipeline = graph.compile()
+
+    initial_state = {
+        "video_path": VIDEO,
+        "job_id": args.job,
+        "profile_name": "soccer_broadcast",
+    }
+
+    result = pipeline.invoke(initial_state)
+
+    # Print results
+    regions = result.get("edge_regions_by_frame", {})
+    total = sum(len(boxes) for boxes in regions.values())
+    frames_with_regions = sum(1 for boxes in regions.values() if boxes)
+
+    logger.info("Results:")
+    logger.info("  Total edge regions: %d", total)
+    logger.info("  Frames with regions: %d / %d",
+                frames_with_regions, len(result.get("filtered_frames", [])))
+
+    for seq, boxes in sorted(regions.items()):
+        if boxes:
+            labels = [f"{b.label}({b.confidence:.2f})" for b in boxes]
+            logger.info("  Frame %d: %s", seq, ", ".join(labels))
+
+    logger.info("Done. Check the frame viewer for cyan boxes.")
+    logger.info("")
+
+    # --- Parameter sensitivity ---
+    logger.info("=== Parameter sensitivity (local debug) ===")
+
+    from detect.stages.edge_detector import _load_cv_edges
+    edges_mod = _load_cv_edges()
+
+    filtered = result.get("filtered_frames", [])
+    if filtered:
+        sample = filtered[0]
+        for canny_low in [20, 50, 80, 120]:
+            dbg = edges_mod.detect_edges_debug(sample.image, canny_low=canny_low)
+            logger.info(
+                "  canny_low=%d → %d horizontals, %d pairs, %d regions",
+                canny_low, dbg["horizontal_count"], dbg["pair_count"], len(dbg["regions"]),
+            )
+
+    logger.info("")
+    logger.info("=== Editor test ===")
+    logger.info("  Dashboard: http://mpr.local.ar/detection/?job=%s", args.job)
+    logger.info("  Editor:    http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", args.job)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/detect/manual/seed_scenario.py
+++ b/tests/detect/manual/seed_scenario.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+Seed a scenario checkpoint from a video chunk.
+
+Extracts frames via ffmpeg, uploads to MinIO, creates a StageCheckpoint
+in Postgres marked as a scenario. No pipeline, no Redis, no SSE.
+
+Prerequisites:
+  - Postgres reachable (port-forward or local)
+  - MinIO reachable (port-forward or local)
+
+Usage:
+    # With K8s port-forwards:
+    kubectl port-forward svc/postgres 5432:5432 &
+    kubectl port-forward svc/minio 9000:9000 &
+
+    python tests/detect/manual/seed_scenario.py
+
+    # Custom video:
+    python tests/detect/manual/seed_scenario.py --video media/mpr/out/chunks/.../chunk_0001.mp4
+
+Then open:
+    http://mpr.local.ar/detection/?job=<JOB_ID>&stage=filter_scenes&editor=true
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import sys
+import uuid
+
+parser = argparse.ArgumentParser(description="Seed a scenario checkpoint")
+parser.add_argument("--video",
+                    default="media/mpr/out/chunks/95043d50-4df6-4ac8-bbd5-2ba873117c6e/chunk_0000.mp4")
+parser.add_argument("--label", default="chelsea_edges_default",
+                    help="Scenario label for bookmarking")
+parser.add_argument("--fps", type=float, default=2.0, help="Frames per second to extract")
+parser.add_argument("--max-frames", type=int, default=20, help="Max frames to extract")
+parser.add_argument("--db-url",
+                    default=os.environ.get("DATABASE_URL", "postgresql://mpr:mpr@localhost:5432/mpr"))
+parser.add_argument("--s3-url",
+                    default=os.environ.get("S3_ENDPOINT_URL", "http://localhost:9000"))
+args = parser.parse_args()
+
+# Set env before imports
+os.environ["DATABASE_URL"] = args.db_url
+os.environ["S3_ENDPOINT_URL"] = args.s3_url
+os.environ.setdefault("AWS_ACCESS_KEY_ID", "minioadmin")
+os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "minioadmin")
+
+sys.path.insert(0, ".")
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)-7s %(name)s — %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def extract_frames_ffmpeg(video_path: str, fps: float, max_frames: int):
+    """Extract frames using ffmpeg subprocess — no pipeline dependencies."""
+    import subprocess
+    import tempfile
+    from pathlib import Path
+
+    import numpy as np
+    from PIL import Image
+
+    from detect.models import Frame
+
+    tmpdir = tempfile.mkdtemp(prefix="scenario_")
+    pattern = os.path.join(tmpdir, "frame_%04d.jpg")
+
+    cmd = [
+        "ffmpeg", "-i", video_path,
+        "-vf", f"fps={fps}",
+        "-frames:v", str(max_frames),
+        "-q:v", "2",
+        pattern,
+        "-y", "-loglevel", "error",
+    ]
+    subprocess.run(cmd, check=True)
+
+    frames = []
+    for jpg in sorted(Path(tmpdir).glob("frame_*.jpg")):
+        seq = int(jpg.stem.split("_")[1]) - 1  # 0-indexed
+        img = Image.open(jpg).convert("RGB")
+        image_array = np.array(img)
+        frame = Frame(
+            sequence=seq,
+            chunk_id=0,
+            timestamp=seq / fps,
+            image=image_array,
+        )
+        frames.append(frame)
+        jpg.unlink()
+
+    Path(tmpdir).rmdir()
+    return frames
+
+
+def main():
+    job_id = str(uuid.uuid4())
+    video_path = args.video
+
+    if not os.path.exists(video_path):
+        logger.error("Video not found: %s", video_path)
+        sys.exit(1)
+
+    logger.info("Video: %s", video_path)
+    logger.info("Job ID: %s", job_id)
+    logger.info("Label: %s", args.label)
+
+    # Ensure DB tables exist
+    from core.db.connection import create_tables
+    create_tables()
+
+    # Extract frames
+    logger.info("Extracting frames (fps=%.1f, max=%d)...", args.fps, args.max_frames)
+    frames = extract_frames_ffmpeg(video_path, args.fps, args.max_frames)
+    logger.info("Extracted %d frames", len(frames))
+
+    # Upload frames to MinIO
+    from detect.checkpoint.frames import save_frames
+    logger.info("Uploading frames to MinIO...")
+    manifest = save_frames(job_id, frames)
+    logger.info("Uploaded %d frames", len(manifest))
+
+    # Build frame metadata
+    frames_meta = [
+        {
+            "sequence": f.sequence,
+            "chunk_id": f.chunk_id,
+            "timestamp": f.timestamp,
+            "perceptual_hash": "",
+        }
+        for f in frames
+    ]
+
+    # All frames are "filtered" (no scene filter ran)
+    filtered_sequences = [f.sequence for f in frames]
+
+    # Save checkpoint as scenario
+    from core.db.detect import save_stage_checkpoint
+    from detect.checkpoint.frames import CHECKPOINT_PREFIX
+
+    checkpoint = save_stage_checkpoint(
+        job_id=job_id,
+        stage="filter_scenes",
+        stage_index=1,
+        frames_prefix=f"{CHECKPOINT_PREFIX}/{job_id}/frames/",
+        frames_manifest={str(k): v for k, v in manifest.items()},
+        frames_meta=frames_meta,
+        filtered_frame_sequences=filtered_sequences,
+        stage_output_key="",
+        stats={"frames_extracted": len(frames), "frames_after_scene_filter": len(frames)},
+        config_snapshot={},
+        config_overrides={},
+        video_path=video_path,
+        profile_name="soccer_broadcast",
+        is_scenario=True,
+        scenario_label=args.label,
+    )
+
+    logger.info("")
+    logger.info("Scenario created:")
+    logger.info("  ID: %s", checkpoint.id)
+    logger.info("  Job: %s", job_id)
+    logger.info("  Label: %s", args.label)
+    logger.info("  Frames: %d", len(frames))
+    logger.info("")
+    logger.info("Open in editor:")
+    logger.info("  http://mpr.local.ar/detection/?job=%s#/editor/detect_edges", job_id)
+
+
+if __name__ == "__main__":
+    main()