add heavy loggin

2026-03-26 10:59:56 -03:00
parent a85722f96a
commit beb0416280
27 changed files with 502 additions and 64 deletions
--- a/detect/stages/frame_extractor.py
+++ b/detect/stages/frame_extractor.py
@@ -8,6 +8,7 @@ Emits log + stats_update SSE events as it works.
 from __future__ import annotations

 import tempfile
+import time
 from pathlib import Path

 import ffmpeg
@@ -53,6 +54,8 @@ def extract_frames(
    emit.log(job_id, "FrameExtractor", "INFO",
             f"Starting extraction: {Path(video_path).name} "
             f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})")
+    emit.log(job_id, "FrameExtractor", "DEBUG",
+             f"Probe: codec={probe.video_codec}, bitrate={probe.video_bitrate}, max_frames={config.max_frames}")

    with tempfile.TemporaryDirectory() as tmpdir:
        pattern = str(Path(tmpdir) / "frame_%06d.jpg")
@@ -65,14 +68,24 @@ def extract_frames(
            .overwrite_output()
        )

+        t0 = time.monotonic()
        try:
            stream.run(capture_stdout=True, capture_stderr=True, quiet=True)
        except ffmpeg.Error as e:
            stderr = e.stderr.decode() if e.stderr else "unknown error"
            emit.log(job_id, "FrameExtractor", "ERROR", f"FFmpeg failed: {stderr[:200]}")
            raise RuntimeError(f"FFmpeg failed: {stderr}") from e
+        ffmpeg_ms = (time.monotonic() - t0) * 1000
+        emit.log(job_id, "FrameExtractor", "DEBUG", f"FFmpeg decode: {ffmpeg_ms:.0f}ms")

+        t0 = time.monotonic()
        frames = _load_frames(Path(tmpdir), config.fps)
+        load_ms = (time.monotonic() - t0) * 1000
+        if frames:
+            h, w = frames[0].image.shape[:2]
+            mem_mb = sum(f.image.nbytes for f in frames) / (1024 * 1024)
+            emit.log(job_id, "FrameExtractor", "DEBUG",
+                     f"Loaded {len(frames)} frames ({w}x{h}) in {load_ms:.0f}ms, {mem_mb:.1f}MB in memory")

    emit.log(job_id, "FrameExtractor", "INFO", f"Extracted {len(frames)} frames")
    emit.stats(job_id, frames_extracted=len(frames))
--- a/detect/stages/ocr_stage.py
+++ b/detect/stages/ocr_stage.py
@@ -13,6 +13,7 @@ Model instances are cached at module level so they survive across pipeline runs.
 from __future__ import annotations

 import logging
+import time
 from typing import TYPE_CHECKING

 import numpy as np
@@ -91,7 +92,8 @@ def run_ocr(
    # Build these once per pipeline run, not per crop
    if inference_url:
        from detect.inference import InferenceClient
-        client = InferenceClient(base_url=inference_url)
+        from detect.emit import _run_log_level
+        client = InferenceClient(base_url=inference_url, job_id=job_id or "", log_level=_run_log_level)
    else:
        model = _get_local_model(config.languages[0])

@@ -108,12 +110,19 @@ def run_ocr(
            if crop.size == 0:
                continue

+            t0 = time.monotonic()
            if inference_url:
                raw_results = client.ocr(image=crop, languages=config.languages)
                texts = [{"text": r.text, "confidence": r.confidence} for r in raw_results]
            else:
                raw = model.ocr(crop)
                texts = _parse_ocr_raw(raw, config.min_confidence)
+            ocr_ms = (time.monotonic() - t0) * 1000
+
+            h, w = crop.shape[:2]
+            text_preview = ", ".join(t["text"][:30] for t in texts) if texts else "(none)"
+            emit.log(job_id, "OCRStage", "DEBUG",
+                     f"Frame {seq} box {box.x},{box.y} ({w}x{h}): {ocr_ms:.0f}ms → {text_preview}")

            for t in texts:
                candidates.append(TextCandidate(
--- a/detect/stages/scene_filter.py
+++ b/detect/stages/scene_filter.py
@@ -9,6 +9,8 @@ CV stages without losing unique visual content.

 from __future__ import annotations

+import time
+
 import imagehash
 from PIL import Image

@@ -63,8 +65,16 @@ def scene_filter(
    emit.log(job_id, "SceneFilter", "INFO",
             f"Filtering {len(frames)} frames (hamming_threshold={config.hamming_threshold})")

+    t0 = time.monotonic()
    hashes = _compute_hashes(frames)
+    hash_ms = (time.monotonic() - t0) * 1000
+    emit.log(job_id, "SceneFilter", "DEBUG",
+             f"Computed {len(hashes)} perceptual hashes in {hash_ms:.0f}ms ({hash_ms/max(len(hashes),1):.1f}ms/frame)")
+
+    t0 = time.monotonic()
    kept = _dedup(frames, hashes, config.hamming_threshold)
+    dedup_ms = (time.monotonic() - t0) * 1000
+    emit.log(job_id, "SceneFilter", "DEBUG", f"Dedup pass: {dedup_ms:.0f}ms")

    dropped = len(frames) - len(kept)
    pct = (dropped / len(frames) * 100) if frames else 0
--- a/detect/stages/vlm_cloud.py
+++ b/detect/stages/vlm_cloud.py
@@ -13,6 +13,8 @@ from __future__ import annotations
 import base64
 import io
 import logging
+import os
+import time

 import numpy as np
 from PIL import Image
@@ -108,6 +110,11 @@ def escalate_cloud(
    if not candidates:
        return []

+    if os.environ.get("SKIP_CLOUD", "").strip() == "1":
+        emit.log(job_id, "CloudLLM", "INFO",
+                 f"SKIP_CLOUD=1, skipping {len(candidates)} crops")
+        return []
+
    if not has_api_key():
        emit.log(job_id, "CloudLLM", "WARNING",
                 f"No API key set for cloud provider, skipping {len(candidates)} crops")
@@ -120,7 +127,7 @@ def escalate_cloud(
    matched: list[BrandDetection] = []
    total_cost = 0.0

-    for candidate in candidates:
+    for i, candidate in enumerate(candidates):
        crop = _crop_image(candidate)
        if crop.size == 0:
            continue
@@ -133,11 +140,15 @@ def escalate_cloud(
        prompt = vlm_prompt_fn(crop_context)
        image_b64 = _encode_crop(crop)

+        t0 = time.monotonic()
        try:
            result = _call_cloud_api(image_b64, prompt)
        except Exception as e:
-            logger.warning("Cloud LLM failed for '%s': %s", candidate.text, e)
+            call_ms = (time.monotonic() - t0) * 1000
+            emit.log(job_id, "CloudLLM", "DEBUG",
+                     f"[{i+1}/{len(candidates)}] FAILED '{candidate.text[:30]}': {e} ({call_ms:.0f}ms)")
            continue
+        call_ms = (time.monotonic() - t0) * 1000

        stats.cloud_llm_calls += 1
        model_info = provider.models.get(provider.model)
@@ -148,6 +159,11 @@ def escalate_cloud(
        brand = result["brand"]
        confidence = result["confidence"]

+        emit.log(job_id, "CloudLLM", "DEBUG",
+                 f"[{i+1}/{len(candidates)}] '{candidate.text[:30]}' → "
+                 f"{'✓ ' + brand if brand else '✗'} "
+                 f"(conf={confidence:.2f}, {result['tokens']}tok, ${call_cost:.4f}, {call_ms:.0f}ms)")
+
        if brand and confidence >= min_confidence:
            detection = BrandDetection(
                brand=brand,
--- a/detect/stages/vlm_local.py
+++ b/detect/stages/vlm_local.py
@@ -9,6 +9,8 @@ objects for crops the VLM can identify.
 from __future__ import annotations

 import logging
+import os
+import time

 import numpy as np

@@ -61,6 +63,11 @@ def escalate_vlm(
    if not candidates:
        return [], []

+    if os.environ.get("SKIP_VLM", "").strip() == "1":
+        emit.log(job_id, "VLMLocal", "INFO",
+                 f"SKIP_VLM=1, skipping {len(candidates)} crops")
+        return [], candidates
+
    emit.log(job_id, "VLMLocal", "INFO",
             f"Processing {len(candidates)} unresolved crops with moondream2")

@@ -69,9 +76,10 @@ def escalate_vlm(

    if inference_url:
        from detect.inference import InferenceClient
-        client = InferenceClient(base_url=inference_url)
+        from detect.emit import _run_log_level
+        client = InferenceClient(base_url=inference_url, job_id=job_id or "", log_level=_run_log_level)

-    for candidate in candidates:
+    for i, candidate in enumerate(candidates):
        crop = _crop_image(candidate)
        if crop.size == 0:
            still_unresolved.append(candidate)
@@ -84,6 +92,7 @@ def escalate_vlm(
        )
        prompt = vlm_prompt_fn(crop_context)

+        t0 = time.monotonic()
        try:
            if inference_url:
                result = client.vlm(image=crop, prompt=prompt)
@@ -93,9 +102,16 @@ def escalate_vlm(
            else:
                brand, confidence, reasoning = _vlm_local(crop, prompt)
        except Exception as e:
-            logger.warning("VLM failed for candidate '%s': %s", candidate.text, e)
+            vlm_ms = (time.monotonic() - t0) * 1000
+            emit.log(job_id, "VLMLocal", "DEBUG",
+                     f"[{i+1}/{len(candidates)}] FAILED '{candidate.text[:30]}': {e} ({vlm_ms:.0f}ms)")
            still_unresolved.append(candidate)
            continue
+        vlm_ms = (time.monotonic() - t0) * 1000
+        emit.log(job_id, "VLMLocal", "DEBUG",
+                 f"[{i+1}/{len(candidates)}] '{candidate.text[:30]}' → "
+                 f"{'✓ ' + brand if brand else '✗ unresolved'} "
+                 f"(conf={confidence:.2f}, {vlm_ms:.0f}ms)")

        if brand and confidence >= min_confidence:
            detection = BrandDetection(
--- a/detect/stages/yolo_detector.py
+++ b/detect/stages/yolo_detector.py
@@ -14,6 +14,7 @@ from __future__ import annotations
 import base64
 import io
 import logging
+import time

 from PIL import Image

@@ -32,10 +33,11 @@ def _frame_to_b64(frame: Frame) -> str:
    return base64.b64encode(buf.getvalue()).decode()


-def _detect_remote(frame: Frame, config: DetectionConfig, inference_url: str) -> list[BoundingBox]:
+def _detect_remote(frame: Frame, config: DetectionConfig, inference_url: str,
+                    job_id: str = "", log_level: str = "INFO") -> list[BoundingBox]:
    """Call the inference server over HTTP."""
    from detect.inference import InferenceClient
-    client = InferenceClient(base_url=inference_url)
+    client = InferenceClient(base_url=inference_url, job_id=job_id, log_level=log_level)
    results = client.detect(
        image=frame.image,
        model=config.model_name,
@@ -99,15 +101,24 @@ def detect_objects(
    all_boxes: dict[int, list[BoundingBox]] = {}
    total_regions = 0

-    for frame in frames:
+    for i, frame in enumerate(frames):
+        t0 = time.monotonic()
        if inference_url:
-            boxes = _detect_remote(frame, config, inference_url)
+            from detect.emit import _run_log_level
+            boxes = _detect_remote(frame, config, inference_url,
+                                   job_id=job_id or "", log_level=_run_log_level)
        else:
            boxes = _detect_local(frame, config)
+        det_ms = (time.monotonic() - t0) * 1000

        all_boxes[frame.sequence] = boxes
        total_regions += len(boxes)

+        emit.log(job_id, "YOLODetector", "DEBUG",
+                 f"Frame {frame.sequence}: {len(boxes)} regions in {det_ms:.0f}ms"
+                 f" [{', '.join(b.label for b in boxes)}]" if boxes else
+                 f"Frame {frame.sequence}: 0 regions in {det_ms:.0f}ms")
+
        if boxes and job_id:
            box_dicts = [{"x": b.x, "y": b.y, "w": b.w, "h": b.h,
                          "confidence": b.confidence, "label": b.label}