phase 4

2026-03-23 15:18:23 -03:00
parent 5ed876d694
commit b57da622cb
17 changed files with 554 additions and 103 deletions
--- a/detect/emit.py
+++ b/detect/emit.py
@@ -0,0 +1,68 @@
+"""
+Event emission helpers for detection pipeline stages.
+
+Single place that knows how to build event payloads.
+Stages call these instead of constructing dicts or dataclasses directly.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+from datetime import datetime, timezone
+
+from detect.events import push_detect_event
+from detect.models import PipelineStats
+
+
+def log(job_id: str | None, stage: str, level: str, msg: str) -> None:
+    """Emit a log event."""
+    if not job_id:
+        return
+    payload = {
+        "level": level,
+        "stage": stage,
+        "msg": msg,
+        "ts": datetime.now(timezone.utc).isoformat(),
+    }
+    push_detect_event(job_id, "log", payload)
+
+
+def stats(job_id: str | None, **kwargs) -> None:
+    """Emit a stats_update event. Pass only the fields that changed."""
+    if not job_id:
+        return
+    s = PipelineStats(**kwargs)
+    push_detect_event(job_id, "stats_update", dataclasses.asdict(s))
+
+
+def detection(
+    job_id: str | None,
+    brand: str,
+    confidence: float,
+    source: str,
+    timestamp: float,
+    duration: float = 0.0,
+    content_type: str = "",
+    frame_ref: int | None = None,
+) -> None:
+    """Emit a brand detection event."""
+    if not job_id:
+        return
+    payload = {
+        "brand": brand,
+        "confidence": confidence,
+        "source": source,
+        "timestamp": timestamp,
+        "duration": duration,
+        "content_type": content_type,
+        "frame_ref": frame_ref,
+    }
+    push_detect_event(job_id, "detection", payload)
+
+
+def job_complete(job_id: str | None, report: dict) -> None:
+    """Emit a job_complete event with the final report."""
+    if not job_id:
+        return
+    payload = {"job_id": job_id, "report": report}
+    push_detect_event(job_id, "job_complete", payload)
--- a/detect/stages/frame_extractor.py
+++ b/detect/stages/frame_extractor.py
@@ -1,25 +1,41 @@
 """
 Stage 1 — Frame Extraction

-Extracts frames from a video at a configurable FPS using FFmpeg.
+Extracts frames from a video at a configurable FPS using the core ffmpeg module.
 Emits log + stats_update SSE events as it works.
 """

 from __future__ import annotations

-import subprocess
 import tempfile
 from pathlib import Path

+import ffmpeg
 import numpy as np
 from PIL import Image

 from core.ffmpeg.probe import probe_file
-from detect.events import push_detect_event
+from detect import emit
 from detect.models import Frame
 from detect.profiles.base import FrameExtractionConfig


+def _load_frames(tmpdir: Path, fps: float) -> list[Frame]:
+    """Load extracted JPEG files into Frame objects."""
+    frame_files = sorted(tmpdir.glob("frame_*.jpg"))
+    frames = []
+    for i, fpath in enumerate(frame_files):
+        img = Image.open(fpath)
+        frame = Frame(
+            sequence=i,
+            chunk_id=0,
+            timestamp=i / fps,
+            image=np.array(img),
+        )
+        frames.append(frame)
+    return frames
+
+
 def extract_frames(
    video_path: str,
    config: FrameExtractionConfig,
@@ -28,75 +44,37 @@ def extract_frames(
    """
    Extract frames from video at the configured FPS.

-    Uses FFmpeg to decode frames as raw images, then loads them
-    as numpy arrays. Caps at config.max_frames.
+    Uses ffmpeg-python to build the extraction pipeline,
+    outputs JPEG files to a temp dir, then loads as numpy arrays.
    """
    probe = probe_file(video_path)
    duration = probe.duration or 0.0

-    if job_id:
-        push_detect_event(job_id, "log", {
-            "level": "INFO",
-            "stage": "FrameExtractor",
-            "msg": f"Starting extraction: {Path(video_path).name} "
-                   f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})",
-        })
-
-    frames: list[Frame] = []
+    emit.log(job_id, "FrameExtractor", "INFO",
+             f"Starting extraction: {Path(video_path).name} "
+             f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})")

    with tempfile.TemporaryDirectory() as tmpdir:
        pattern = str(Path(tmpdir) / "frame_%06d.jpg")

-        cmd = [
-            "ffmpeg", "-i", video_path,
-            "-vf", f"fps={config.fps}",
-            "-q:v", "2",
-            "-frames:v", str(config.max_frames),
-            pattern,
-            "-y", "-loglevel", "warning",
-        ]
+        stream = (
+            ffmpeg
+            .input(video_path)
+            .filter("fps", fps=config.fps)
+            .output(pattern, qscale=2, frames=config.max_frames)
+            .overwrite_output()
+        )

-        result = subprocess.run(cmd, capture_output=True, text=True)
+        try:
+            stream.run(capture_stdout=True, capture_stderr=True, quiet=True)
+        except ffmpeg.Error as e:
+            stderr = e.stderr.decode() if e.stderr else "unknown error"
+            emit.log(job_id, "FrameExtractor", "ERROR", f"FFmpeg failed: {stderr[:200]}")
+            raise RuntimeError(f"FFmpeg failed: {stderr}") from e

-        if result.returncode != 0:
-            if job_id:
-                push_detect_event(job_id, "log", {
-                    "level": "ERROR",
-                    "stage": "FrameExtractor",
-                    "msg": f"FFmpeg failed: {result.stderr[:200]}",
-                })
-            raise RuntimeError(f"FFmpeg failed: {result.stderr}")
+        frames = _load_frames(Path(tmpdir), config.fps)

-        frame_files = sorted(Path(tmpdir).glob("frame_*.jpg"))
-
-        for i, fpath in enumerate(frame_files):
-            img = Image.open(fpath)
-            arr = np.array(img)
-            timestamp = i / config.fps
-
-            frames.append(Frame(
-                sequence=i,
-                chunk_id=0,
-                timestamp=timestamp,
-                image=arr,
-            ))
-
-        if job_id:
-            push_detect_event(job_id, "log", {
-                "level": "INFO",
-                "stage": "FrameExtractor",
-                "msg": f"Extracted {len(frames)} frames",
-            })
-            push_detect_event(job_id, "stats_update", {
-                "frames_extracted": len(frames),
-                "frames_after_scene_filter": 0,
-                "regions_detected": 0,
-                "regions_resolved_by_ocr": 0,
-                "regions_escalated_to_local_vlm": 0,
-                "regions_escalated_to_cloud_llm": 0,
-                "cloud_llm_calls": 0,
-                "processing_time_seconds": 0.0,
-                "estimated_cloud_cost_usd": 0.0,
-            })
+    emit.log(job_id, "FrameExtractor", "INFO", f"Extracted {len(frames)} frames")
+    emit.stats(job_id, frames_extracted=len(frames))

    return frames
--- a/detect/stages/scene_filter.py
+++ b/detect/stages/scene_filter.py
@@ -0,0 +1,76 @@
+"""
+Stage 2 — Scene Filter
+
+Removes near-duplicate frames using perceptual hashing (pHash).
+Frames with a hamming distance below the threshold are considered
+duplicates and dropped. This dramatically reduces work for downstream
+CV stages without losing unique visual content.
+"""
+
+from __future__ import annotations
+
+import imagehash
+from PIL import Image
+
+from detect import emit
+from detect.models import Frame
+from detect.profiles.base import SceneFilterConfig
+
+
+def _compute_hashes(frames: list[Frame]) -> list[imagehash.ImageHash]:
+    """Compute perceptual hashes for all frames."""
+    hashes = []
+    for f in frames:
+        img = Image.fromarray(f.image)
+        h = imagehash.phash(img)
+        f.perceptual_hash = str(h)
+        hashes.append(h)
+    return hashes
+
+
+def _dedup(frames: list[Frame], hashes: list[imagehash.ImageHash], threshold: int) -> list[Frame]:
+    """Greedy dedup: keep a frame if it's sufficiently different from all kept frames."""
+    kept = [frames[0]]
+    kept_hashes = [hashes[0]]
+
+    for i in range(1, len(frames)):
+        is_duplicate = any(hashes[i] - kh < threshold for kh in kept_hashes)
+        if not is_duplicate:
+            kept.append(frames[i])
+            kept_hashes.append(hashes[i])
+
+    return kept
+
+
+def scene_filter(
+    frames: list[Frame],
+    config: SceneFilterConfig,
+    job_id: str | None = None,
+) -> list[Frame]:
+    """
+    Filter near-duplicate frames based on perceptual hash distance.
+
+    Keeps the first frame in each group of similar frames.
+    Returns a new list — does not mutate the input.
+    """
+    if not config.enabled:
+        emit.log(job_id, "SceneFilter", "INFO", "Scene filter disabled, passing all frames through")
+        return frames
+
+    if not frames:
+        return []
+
+    emit.log(job_id, "SceneFilter", "INFO",
+             f"Filtering {len(frames)} frames (hamming_threshold={config.hamming_threshold})")
+
+    hashes = _compute_hashes(frames)
+    kept = _dedup(frames, hashes, config.hamming_threshold)
+
+    dropped = len(frames) - len(kept)
+    pct = (dropped / len(frames) * 100) if frames else 0
+
+    emit.log(job_id, "SceneFilter", "INFO",
+             f"Kept {len(kept)} frames, dropped {dropped} ({pct:.0f}% reduction)")
+    emit.stats(job_id, frames_extracted=len(frames), frames_after_scene_filter=len(kept))
+
+    return kept