phase cv 0

2026-03-26 22:22:35 -03:00
parent beb0416280
commit 65814b5b9e
46 changed files with 2962 additions and 268 deletions
--- a/detect/stages/edge_detector.py
+++ b/detect/stages/edge_detector.py
@@ -0,0 +1,174 @@
+"""
+Stage — Edge Detection
+
+Canny + HoughLinesP to find horizontal line pairs that bound
+advertising hoardings. Pure OpenCV, no ML models.
+
+Two modes:
+  - Remote: calls GPU inference server over HTTP
+  - Local: imports cv2 directly (OpenCV on same machine)
+
+Emits frame_update events with bounding boxes for the frame viewer.
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import logging
+import time
+
+from PIL import Image
+
+from detect import emit
+from detect.models import BoundingBox, Frame
+from detect.profiles.base import RegionAnalysisConfig
+
+logger = logging.getLogger(__name__)
+
+
+def _frame_to_b64(frame: Frame) -> str:
+    """Encode frame as base64 JPEG for SSE frame_update events."""
+    img = Image.fromarray(frame.image)
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=70)
+    return base64.b64encode(buf.getvalue()).decode()
+
+
+def _detect_remote(
+    frame: Frame,
+    config: RegionAnalysisConfig,
+    inference_url: str,
+    job_id: str = "",
+    log_level: str = "INFO",
+) -> list[BoundingBox]:
+    """Call the inference server over HTTP."""
+    from detect.inference import InferenceClient
+
+    client = InferenceClient(
+        base_url=inference_url, job_id=job_id, log_level=log_level,
+    )
+    results = client.detect_edges(
+        image=frame.image,
+        edge_canny_low=config.edge_canny_low,
+        edge_canny_high=config.edge_canny_high,
+        edge_hough_threshold=config.edge_hough_threshold,
+        edge_hough_min_length=config.edge_hough_min_length,
+        edge_hough_max_gap=config.edge_hough_max_gap,
+        edge_pair_max_distance=config.edge_pair_max_distance,
+        edge_pair_min_distance=config.edge_pair_min_distance,
+    )
+    boxes = []
+    for r in results:
+        box = BoundingBox(
+            x=r.x, y=r.y, w=r.w, h=r.h,
+            confidence=r.confidence, label=r.label,
+        )
+        boxes.append(box)
+    return boxes
+
+
+_cv_edges_mod = None
+
+
+def _load_cv_edges():
+    """Load edges module directly — gpu/models/__init__.py has GPU-container-only imports."""
+    global _cv_edges_mod
+    if _cv_edges_mod is None:
+        import importlib.util
+        from pathlib import Path
+
+        spec = importlib.util.spec_from_file_location("cv_edges", Path("gpu/models/cv/edges.py"))
+        _cv_edges_mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(_cv_edges_mod)
+    return _cv_edges_mod
+
+
+def _detect_local(frame: Frame, config: RegionAnalysisConfig) -> list[BoundingBox]:
+    """Run edge detection in-process (requires opencv-python)."""
+    detect_edges_fn = _load_cv_edges().detect_edges
+
+    edge_results = detect_edges_fn(
+        frame.image,
+        canny_low=config.edge_canny_low,
+        canny_high=config.edge_canny_high,
+        hough_threshold=config.edge_hough_threshold,
+        hough_min_length=config.edge_hough_min_length,
+        hough_max_gap=config.edge_hough_max_gap,
+        pair_max_distance=config.edge_pair_max_distance,
+        pair_min_distance=config.edge_pair_min_distance,
+    )
+
+    boxes = []
+    for r in edge_results:
+        box = BoundingBox(
+            x=r["x"], y=r["y"], w=r["w"], h=r["h"],
+            confidence=r["confidence"], label=r["label"],
+        )
+        boxes.append(box)
+    return boxes
+
+
+def detect_edge_regions(
+    frames: list[Frame],
+    config: RegionAnalysisConfig,
+    inference_url: str | None = None,
+    job_id: str | None = None,
+) -> dict[int, list[BoundingBox]]:
+    """
+    Run edge detection on all frames.
+
+    Returns a dict mapping frame sequence → list of bounding boxes.
+    """
+    if not config.enabled:
+        emit.log(job_id, "EdgeDetection", "INFO", "Edge detection disabled, skipping")
+        return {}
+
+    mode = "remote" if inference_url else "local"
+    emit.log(job_id, "EdgeDetection", "INFO",
+             f"Detecting edges in {len(frames)} frames (mode={mode})")
+
+    all_boxes: dict[int, list[BoundingBox]] = {}
+    total_regions = 0
+
+    for i, frame in enumerate(frames):
+        t0 = time.monotonic()
+        if inference_url:
+            from detect.emit import _run_log_level
+            boxes = _detect_remote(
+                frame, config, inference_url,
+                job_id=job_id or "", log_level=_run_log_level,
+            )
+        else:
+            boxes = _detect_local(frame, config)
+        analysis_ms = (time.monotonic() - t0) * 1000
+
+        all_boxes[frame.sequence] = boxes
+        total_regions += len(boxes)
+
+        emit.log(job_id, "EdgeDetection", "DEBUG",
+                 f"Frame {frame.sequence}: {len(boxes)} regions in {analysis_ms:.0f}ms"
+                 + (f" [{', '.join(b.label for b in boxes)}]" if boxes else ""))
+
+        if boxes and job_id:
+            box_dicts = [
+                {
+                    "x": b.x, "y": b.y, "w": b.w, "h": b.h,
+                    "confidence": b.confidence, "label": b.label,
+                    "stage": "detect_edges",
+                }
+                for b in boxes
+            ]
+            emit.frame_update(
+                job_id,
+                frame_ref=frame.sequence,
+                timestamp=frame.timestamp,
+                jpeg_b64=_frame_to_b64(frame),
+                boxes=box_dicts,
+            )
+
+    emit.log(job_id, "EdgeDetection", "INFO",
+             f"Found {total_regions} edge regions across {len(frames)} frames")
+    emit.stats(job_id, cv_regions_detected=total_regions)
+
+    return all_boxes
--- a/detect/stages/registry/init.py
+++ b/detect/stages/registry/init.py
@@ -3,6 +3,7 @@ Stage registry — registers all built-in stages.

 Split by category:
  preprocessing.py  — extract_frames, filter_scenes
+  cv_analysis.py    — detect_edges (+ future: detect_contours, detect_color, merge_regions)
  detection.py      — detect_objects, run_ocr
  resolution.py     — match_brands
  escalation.py     — escalate_vlm, escalate_cloud
@@ -11,6 +12,7 @@ Split by category:
 """

 from . import preprocessing
+from . import cv_analysis
 from . import detection
 from . import resolution
 from . import escalation
@@ -19,6 +21,7 @@ from . import output

 def register_all():
    preprocessing.register()
+    cv_analysis.register()
    detection.register()
    resolution.register()
    escalation.register()
--- a/detect/stages/registry/cv_analysis.py
+++ b/detect/stages/registry/cv_analysis.py
@@ -0,0 +1,45 @@
+"""Registration for CV analysis stages: edge detection."""
+
+from detect.stages.base import StageDefinition, StageIO, StageConfigField, register_stage
+from ._serializers import serialize_dataclass_list, deserialize_bounding_box
+
+
+def _ser_regions(state: dict, job_id: str) -> dict:
+    regions = state.get("edge_regions_by_frame", {})
+    serialized = {
+        str(seq): serialize_dataclass_list(bl) for seq, bl in regions.items()
+    }
+    return {"edge_regions_by_frame": serialized}
+
+
+def _deser_regions(data: dict, job_id: str) -> dict:
+    regions = {}
+    for seq_str, box_dicts in data.get("edge_regions_by_frame", {}).items():
+        regions[int(seq_str)] = [deserialize_bounding_box(b) for b in box_dicts]
+    return {"edge_regions_by_frame": regions}
+
+
+def register():
+    edge_detection = StageDefinition(
+        name="detect_edges",
+        label="Edge Detection",
+        description="Canny + HoughLinesP — find horizontal line pairs (hoarding boundaries)",
+        category="cv_analysis",
+        io=StageIO(
+            reads=["filtered_frames"],
+            writes=["edge_regions_by_frame"],
+        ),
+        config_fields=[
+            StageConfigField("enabled", "bool", True, "Enable region analysis"),
+            StageConfigField("edge_canny_low", "int", 50, "Canny low threshold", min=0, max=255),
+            StageConfigField("edge_canny_high", "int", 150, "Canny high threshold", min=0, max=255),
+            StageConfigField("edge_hough_threshold", "int", 80, "Hough accumulator threshold", min=1, max=500),
+            StageConfigField("edge_hough_min_length", "int", 100, "Min line length (px)", min=10, max=2000),
+            StageConfigField("edge_hough_max_gap", "int", 10, "Max line gap (px)", min=1, max=100),
+            StageConfigField("edge_pair_max_distance", "int", 200, "Max distance between line pair (px)", min=10, max=500),
+            StageConfigField("edge_pair_min_distance", "int", 15, "Min distance between line pair (px)", min=5, max=200),
+        ],
+        serialize_fn=_ser_regions,
+        deserialize_fn=_deser_regions,
+    )
+    register_stage(edge_detection)