This commit is contained in:
2026-03-23 15:18:23 -03:00
parent 5ed876d694
commit b57da622cb
17 changed files with 554 additions and 103 deletions

68
detect/emit.py Normal file
View File

@@ -0,0 +1,68 @@
"""
Event emission helpers for detection pipeline stages.
Single place that knows how to build event payloads.
Stages call these instead of constructing dicts or dataclasses directly.
"""
from __future__ import annotations
import dataclasses
from datetime import datetime, timezone
from detect.events import push_detect_event
from detect.models import PipelineStats
def log(job_id: str | None, stage: str, level: str, msg: str) -> None:
"""Emit a log event."""
if not job_id:
return
payload = {
"level": level,
"stage": stage,
"msg": msg,
"ts": datetime.now(timezone.utc).isoformat(),
}
push_detect_event(job_id, "log", payload)
def stats(job_id: str | None, **kwargs) -> None:
"""Emit a stats_update event. Pass only the fields that changed."""
if not job_id:
return
s = PipelineStats(**kwargs)
push_detect_event(job_id, "stats_update", dataclasses.asdict(s))
def detection(
job_id: str | None,
brand: str,
confidence: float,
source: str,
timestamp: float,
duration: float = 0.0,
content_type: str = "",
frame_ref: int | None = None,
) -> None:
"""Emit a brand detection event."""
if not job_id:
return
payload = {
"brand": brand,
"confidence": confidence,
"source": source,
"timestamp": timestamp,
"duration": duration,
"content_type": content_type,
"frame_ref": frame_ref,
}
push_detect_event(job_id, "detection", payload)
def job_complete(job_id: str | None, report: dict) -> None:
"""Emit a job_complete event with the final report."""
if not job_id:
return
payload = {"job_id": job_id, "report": report}
push_detect_event(job_id, "job_complete", payload)

View File

@@ -1,25 +1,41 @@
"""
Stage 1 — Frame Extraction
Extracts frames from a video at a configurable FPS using FFmpeg.
Extracts frames from a video at a configurable FPS using the core ffmpeg module.
Emits log + stats_update SSE events as it works.
"""
from __future__ import annotations
import subprocess
import tempfile
from pathlib import Path
import ffmpeg
import numpy as np
from PIL import Image
from core.ffmpeg.probe import probe_file
from detect.events import push_detect_event
from detect import emit
from detect.models import Frame
from detect.profiles.base import FrameExtractionConfig
def _load_frames(tmpdir: Path, fps: float) -> list[Frame]:
"""Load extracted JPEG files into Frame objects."""
frame_files = sorted(tmpdir.glob("frame_*.jpg"))
frames = []
for i, fpath in enumerate(frame_files):
img = Image.open(fpath)
frame = Frame(
sequence=i,
chunk_id=0,
timestamp=i / fps,
image=np.array(img),
)
frames.append(frame)
return frames
def extract_frames(
video_path: str,
config: FrameExtractionConfig,
@@ -28,75 +44,37 @@ def extract_frames(
"""
Extract frames from video at the configured FPS.
Uses FFmpeg to decode frames as raw images, then loads them
as numpy arrays. Caps at config.max_frames.
Uses ffmpeg-python to build the extraction pipeline,
outputs JPEG files to a temp dir, then loads as numpy arrays.
"""
probe = probe_file(video_path)
duration = probe.duration or 0.0
if job_id:
push_detect_event(job_id, "log", {
"level": "INFO",
"stage": "FrameExtractor",
"msg": f"Starting extraction: {Path(video_path).name} "
f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})",
})
frames: list[Frame] = []
emit.log(job_id, "FrameExtractor", "INFO",
f"Starting extraction: {Path(video_path).name} "
f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})")
with tempfile.TemporaryDirectory() as tmpdir:
pattern = str(Path(tmpdir) / "frame_%06d.jpg")
cmd = [
"ffmpeg", "-i", video_path,
"-vf", f"fps={config.fps}",
"-q:v", "2",
"-frames:v", str(config.max_frames),
pattern,
"-y", "-loglevel", "warning",
]
stream = (
ffmpeg
.input(video_path)
.filter("fps", fps=config.fps)
.output(pattern, qscale=2, frames=config.max_frames)
.overwrite_output()
)
result = subprocess.run(cmd, capture_output=True, text=True)
try:
stream.run(capture_stdout=True, capture_stderr=True, quiet=True)
except ffmpeg.Error as e:
stderr = e.stderr.decode() if e.stderr else "unknown error"
emit.log(job_id, "FrameExtractor", "ERROR", f"FFmpeg failed: {stderr[:200]}")
raise RuntimeError(f"FFmpeg failed: {stderr}") from e
if result.returncode != 0:
if job_id:
push_detect_event(job_id, "log", {
"level": "ERROR",
"stage": "FrameExtractor",
"msg": f"FFmpeg failed: {result.stderr[:200]}",
})
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
frames = _load_frames(Path(tmpdir), config.fps)
frame_files = sorted(Path(tmpdir).glob("frame_*.jpg"))
for i, fpath in enumerate(frame_files):
img = Image.open(fpath)
arr = np.array(img)
timestamp = i / config.fps
frames.append(Frame(
sequence=i,
chunk_id=0,
timestamp=timestamp,
image=arr,
))
if job_id:
push_detect_event(job_id, "log", {
"level": "INFO",
"stage": "FrameExtractor",
"msg": f"Extracted {len(frames)} frames",
})
push_detect_event(job_id, "stats_update", {
"frames_extracted": len(frames),
"frames_after_scene_filter": 0,
"regions_detected": 0,
"regions_resolved_by_ocr": 0,
"regions_escalated_to_local_vlm": 0,
"regions_escalated_to_cloud_llm": 0,
"cloud_llm_calls": 0,
"processing_time_seconds": 0.0,
"estimated_cloud_cost_usd": 0.0,
})
emit.log(job_id, "FrameExtractor", "INFO", f"Extracted {len(frames)} frames")
emit.stats(job_id, frames_extracted=len(frames))
return frames

View File

@@ -0,0 +1,76 @@
"""
Stage 2 — Scene Filter
Removes near-duplicate frames using perceptual hashing (pHash).
Frames with a hamming distance below the threshold are considered
duplicates and dropped. This dramatically reduces work for downstream
CV stages without losing unique visual content.
"""
from __future__ import annotations
import imagehash
from PIL import Image
from detect import emit
from detect.models import Frame
from detect.profiles.base import SceneFilterConfig
def _compute_hashes(frames: list[Frame]) -> list[imagehash.ImageHash]:
"""Compute perceptual hashes for all frames."""
hashes = []
for f in frames:
img = Image.fromarray(f.image)
h = imagehash.phash(img)
f.perceptual_hash = str(h)
hashes.append(h)
return hashes
def _dedup(frames: list[Frame], hashes: list[imagehash.ImageHash], threshold: int) -> list[Frame]:
"""Greedy dedup: keep a frame if it's sufficiently different from all kept frames."""
kept = [frames[0]]
kept_hashes = [hashes[0]]
for i in range(1, len(frames)):
is_duplicate = any(hashes[i] - kh < threshold for kh in kept_hashes)
if not is_duplicate:
kept.append(frames[i])
kept_hashes.append(hashes[i])
return kept
def scene_filter(
frames: list[Frame],
config: SceneFilterConfig,
job_id: str | None = None,
) -> list[Frame]:
"""
Filter near-duplicate frames based on perceptual hash distance.
Keeps the first frame in each group of similar frames.
Returns a new list — does not mutate the input.
"""
if not config.enabled:
emit.log(job_id, "SceneFilter", "INFO", "Scene filter disabled, passing all frames through")
return frames
if not frames:
return []
emit.log(job_id, "SceneFilter", "INFO",
f"Filtering {len(frames)} frames (hamming_threshold={config.hamming_threshold})")
hashes = _compute_hashes(frames)
kept = _dedup(frames, hashes, config.hamming_threshold)
dropped = len(frames) - len(kept)
pct = (dropped / len(frames) * 100) if frames else 0
emit.log(job_id, "SceneFilter", "INFO",
f"Kept {len(kept)} frames, dropped {dropped} ({pct:.0f}% reduction)")
emit.stats(job_id, frames_extracted=len(frames), frames_after_scene_filter=len(kept))
return kept