phase 4
This commit is contained in:
68
detect/emit.py
Normal file
68
detect/emit.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Event emission helpers for detection pipeline stages.
|
||||
|
||||
Single place that knows how to build event payloads.
|
||||
Stages call these instead of constructing dicts or dataclasses directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from detect.events import push_detect_event
|
||||
from detect.models import PipelineStats
|
||||
|
||||
|
||||
def log(job_id: str | None, stage: str, level: str, msg: str) -> None:
|
||||
"""Emit a log event."""
|
||||
if not job_id:
|
||||
return
|
||||
payload = {
|
||||
"level": level,
|
||||
"stage": stage,
|
||||
"msg": msg,
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
push_detect_event(job_id, "log", payload)
|
||||
|
||||
|
||||
def stats(job_id: str | None, **kwargs) -> None:
|
||||
"""Emit a stats_update event. Pass only the fields that changed."""
|
||||
if not job_id:
|
||||
return
|
||||
s = PipelineStats(**kwargs)
|
||||
push_detect_event(job_id, "stats_update", dataclasses.asdict(s))
|
||||
|
||||
|
||||
def detection(
|
||||
job_id: str | None,
|
||||
brand: str,
|
||||
confidence: float,
|
||||
source: str,
|
||||
timestamp: float,
|
||||
duration: float = 0.0,
|
||||
content_type: str = "",
|
||||
frame_ref: int | None = None,
|
||||
) -> None:
|
||||
"""Emit a brand detection event."""
|
||||
if not job_id:
|
||||
return
|
||||
payload = {
|
||||
"brand": brand,
|
||||
"confidence": confidence,
|
||||
"source": source,
|
||||
"timestamp": timestamp,
|
||||
"duration": duration,
|
||||
"content_type": content_type,
|
||||
"frame_ref": frame_ref,
|
||||
}
|
||||
push_detect_event(job_id, "detection", payload)
|
||||
|
||||
|
||||
def job_complete(job_id: str | None, report: dict) -> None:
|
||||
"""Emit a job_complete event with the final report."""
|
||||
if not job_id:
|
||||
return
|
||||
payload = {"job_id": job_id, "report": report}
|
||||
push_detect_event(job_id, "job_complete", payload)
|
||||
@@ -1,25 +1,41 @@
|
||||
"""
|
||||
Stage 1 — Frame Extraction
|
||||
|
||||
Extracts frames from a video at a configurable FPS using FFmpeg.
|
||||
Extracts frames from a video at a configurable FPS using the core ffmpeg module.
|
||||
Emits log + stats_update SSE events as it works.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from core.ffmpeg.probe import probe_file
|
||||
from detect.events import push_detect_event
|
||||
from detect import emit
|
||||
from detect.models import Frame
|
||||
from detect.profiles.base import FrameExtractionConfig
|
||||
|
||||
|
||||
def _load_frames(tmpdir: Path, fps: float) -> list[Frame]:
|
||||
"""Load extracted JPEG files into Frame objects."""
|
||||
frame_files = sorted(tmpdir.glob("frame_*.jpg"))
|
||||
frames = []
|
||||
for i, fpath in enumerate(frame_files):
|
||||
img = Image.open(fpath)
|
||||
frame = Frame(
|
||||
sequence=i,
|
||||
chunk_id=0,
|
||||
timestamp=i / fps,
|
||||
image=np.array(img),
|
||||
)
|
||||
frames.append(frame)
|
||||
return frames
|
||||
|
||||
|
||||
def extract_frames(
|
||||
video_path: str,
|
||||
config: FrameExtractionConfig,
|
||||
@@ -28,75 +44,37 @@ def extract_frames(
|
||||
"""
|
||||
Extract frames from video at the configured FPS.
|
||||
|
||||
Uses FFmpeg to decode frames as raw images, then loads them
|
||||
as numpy arrays. Caps at config.max_frames.
|
||||
Uses ffmpeg-python to build the extraction pipeline,
|
||||
outputs JPEG files to a temp dir, then loads as numpy arrays.
|
||||
"""
|
||||
probe = probe_file(video_path)
|
||||
duration = probe.duration or 0.0
|
||||
|
||||
if job_id:
|
||||
push_detect_event(job_id, "log", {
|
||||
"level": "INFO",
|
||||
"stage": "FrameExtractor",
|
||||
"msg": f"Starting extraction: {Path(video_path).name} "
|
||||
f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})",
|
||||
})
|
||||
|
||||
frames: list[Frame] = []
|
||||
emit.log(job_id, "FrameExtractor", "INFO",
|
||||
f"Starting extraction: {Path(video_path).name} "
|
||||
f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pattern = str(Path(tmpdir) / "frame_%06d.jpg")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-i", video_path,
|
||||
"-vf", f"fps={config.fps}",
|
||||
"-q:v", "2",
|
||||
"-frames:v", str(config.max_frames),
|
||||
pattern,
|
||||
"-y", "-loglevel", "warning",
|
||||
]
|
||||
stream = (
|
||||
ffmpeg
|
||||
.input(video_path)
|
||||
.filter("fps", fps=config.fps)
|
||||
.output(pattern, qscale=2, frames=config.max_frames)
|
||||
.overwrite_output()
|
||||
)
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
try:
|
||||
stream.run(capture_stdout=True, capture_stderr=True, quiet=True)
|
||||
except ffmpeg.Error as e:
|
||||
stderr = e.stderr.decode() if e.stderr else "unknown error"
|
||||
emit.log(job_id, "FrameExtractor", "ERROR", f"FFmpeg failed: {stderr[:200]}")
|
||||
raise RuntimeError(f"FFmpeg failed: {stderr}") from e
|
||||
|
||||
if result.returncode != 0:
|
||||
if job_id:
|
||||
push_detect_event(job_id, "log", {
|
||||
"level": "ERROR",
|
||||
"stage": "FrameExtractor",
|
||||
"msg": f"FFmpeg failed: {result.stderr[:200]}",
|
||||
})
|
||||
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
|
||||
frames = _load_frames(Path(tmpdir), config.fps)
|
||||
|
||||
frame_files = sorted(Path(tmpdir).glob("frame_*.jpg"))
|
||||
|
||||
for i, fpath in enumerate(frame_files):
|
||||
img = Image.open(fpath)
|
||||
arr = np.array(img)
|
||||
timestamp = i / config.fps
|
||||
|
||||
frames.append(Frame(
|
||||
sequence=i,
|
||||
chunk_id=0,
|
||||
timestamp=timestamp,
|
||||
image=arr,
|
||||
))
|
||||
|
||||
if job_id:
|
||||
push_detect_event(job_id, "log", {
|
||||
"level": "INFO",
|
||||
"stage": "FrameExtractor",
|
||||
"msg": f"Extracted {len(frames)} frames",
|
||||
})
|
||||
push_detect_event(job_id, "stats_update", {
|
||||
"frames_extracted": len(frames),
|
||||
"frames_after_scene_filter": 0,
|
||||
"regions_detected": 0,
|
||||
"regions_resolved_by_ocr": 0,
|
||||
"regions_escalated_to_local_vlm": 0,
|
||||
"regions_escalated_to_cloud_llm": 0,
|
||||
"cloud_llm_calls": 0,
|
||||
"processing_time_seconds": 0.0,
|
||||
"estimated_cloud_cost_usd": 0.0,
|
||||
})
|
||||
emit.log(job_id, "FrameExtractor", "INFO", f"Extracted {len(frames)} frames")
|
||||
emit.stats(job_id, frames_extracted=len(frames))
|
||||
|
||||
return frames
|
||||
|
||||
76
detect/stages/scene_filter.py
Normal file
76
detect/stages/scene_filter.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Stage 2 — Scene Filter
|
||||
|
||||
Removes near-duplicate frames using perceptual hashing (pHash).
|
||||
Frames with a hamming distance below the threshold are considered
|
||||
duplicates and dropped. This dramatically reduces work for downstream
|
||||
CV stages without losing unique visual content.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
|
||||
from detect import emit
|
||||
from detect.models import Frame
|
||||
from detect.profiles.base import SceneFilterConfig
|
||||
|
||||
|
||||
def _compute_hashes(frames: list[Frame]) -> list[imagehash.ImageHash]:
|
||||
"""Compute perceptual hashes for all frames."""
|
||||
hashes = []
|
||||
for f in frames:
|
||||
img = Image.fromarray(f.image)
|
||||
h = imagehash.phash(img)
|
||||
f.perceptual_hash = str(h)
|
||||
hashes.append(h)
|
||||
return hashes
|
||||
|
||||
|
||||
def _dedup(frames: list[Frame], hashes: list[imagehash.ImageHash], threshold: int) -> list[Frame]:
|
||||
"""Greedy dedup: keep a frame if it's sufficiently different from all kept frames."""
|
||||
kept = [frames[0]]
|
||||
kept_hashes = [hashes[0]]
|
||||
|
||||
for i in range(1, len(frames)):
|
||||
is_duplicate = any(hashes[i] - kh < threshold for kh in kept_hashes)
|
||||
if not is_duplicate:
|
||||
kept.append(frames[i])
|
||||
kept_hashes.append(hashes[i])
|
||||
|
||||
return kept
|
||||
|
||||
|
||||
def scene_filter(
|
||||
frames: list[Frame],
|
||||
config: SceneFilterConfig,
|
||||
job_id: str | None = None,
|
||||
) -> list[Frame]:
|
||||
"""
|
||||
Filter near-duplicate frames based on perceptual hash distance.
|
||||
|
||||
Keeps the first frame in each group of similar frames.
|
||||
Returns a new list — does not mutate the input.
|
||||
"""
|
||||
if not config.enabled:
|
||||
emit.log(job_id, "SceneFilter", "INFO", "Scene filter disabled, passing all frames through")
|
||||
return frames
|
||||
|
||||
if not frames:
|
||||
return []
|
||||
|
||||
emit.log(job_id, "SceneFilter", "INFO",
|
||||
f"Filtering {len(frames)} frames (hamming_threshold={config.hamming_threshold})")
|
||||
|
||||
hashes = _compute_hashes(frames)
|
||||
kept = _dedup(frames, hashes, config.hamming_threshold)
|
||||
|
||||
dropped = len(frames) - len(kept)
|
||||
pct = (dropped / len(frames) * 100) if frames else 0
|
||||
|
||||
emit.log(job_id, "SceneFilter", "INFO",
|
||||
f"Kept {len(kept)} frames, dropped {dropped} ({pct:.0f}% reduction)")
|
||||
emit.stats(job_id, frames_extracted=len(frames), frames_after_scene_filter=len(kept))
|
||||
|
||||
return kept
|
||||
Reference in New Issue
Block a user