add heavy loggin
This commit is contained in:
@@ -8,6 +8,7 @@ Emits log + stats_update SSE events as it works.
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import ffmpeg
|
||||
@@ -53,6 +54,8 @@ def extract_frames(
|
||||
emit.log(job_id, "FrameExtractor", "INFO",
|
||||
f"Starting extraction: {Path(video_path).name} "
|
||||
f"({duration:.1f}s, {probe.width}x{probe.height}, fps={config.fps})")
|
||||
emit.log(job_id, "FrameExtractor", "DEBUG",
|
||||
f"Probe: codec={probe.video_codec}, bitrate={probe.video_bitrate}, max_frames={config.max_frames}")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pattern = str(Path(tmpdir) / "frame_%06d.jpg")
|
||||
@@ -65,14 +68,24 @@ def extract_frames(
|
||||
.overwrite_output()
|
||||
)
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
stream.run(capture_stdout=True, capture_stderr=True, quiet=True)
|
||||
except ffmpeg.Error as e:
|
||||
stderr = e.stderr.decode() if e.stderr else "unknown error"
|
||||
emit.log(job_id, "FrameExtractor", "ERROR", f"FFmpeg failed: {stderr[:200]}")
|
||||
raise RuntimeError(f"FFmpeg failed: {stderr}") from e
|
||||
ffmpeg_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "FrameExtractor", "DEBUG", f"FFmpeg decode: {ffmpeg_ms:.0f}ms")
|
||||
|
||||
t0 = time.monotonic()
|
||||
frames = _load_frames(Path(tmpdir), config.fps)
|
||||
load_ms = (time.monotonic() - t0) * 1000
|
||||
if frames:
|
||||
h, w = frames[0].image.shape[:2]
|
||||
mem_mb = sum(f.image.nbytes for f in frames) / (1024 * 1024)
|
||||
emit.log(job_id, "FrameExtractor", "DEBUG",
|
||||
f"Loaded {len(frames)} frames ({w}x{h}) in {load_ms:.0f}ms, {mem_mb:.1f}MB in memory")
|
||||
|
||||
emit.log(job_id, "FrameExtractor", "INFO", f"Extracted {len(frames)} frames")
|
||||
emit.stats(job_id, frames_extracted=len(frames))
|
||||
|
||||
@@ -13,6 +13,7 @@ Model instances are cached at module level so they survive across pipeline runs.
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
@@ -91,7 +92,8 @@ def run_ocr(
|
||||
# Build these once per pipeline run, not per crop
|
||||
if inference_url:
|
||||
from detect.inference import InferenceClient
|
||||
client = InferenceClient(base_url=inference_url)
|
||||
from detect.emit import _run_log_level
|
||||
client = InferenceClient(base_url=inference_url, job_id=job_id or "", log_level=_run_log_level)
|
||||
else:
|
||||
model = _get_local_model(config.languages[0])
|
||||
|
||||
@@ -108,12 +110,19 @@ def run_ocr(
|
||||
if crop.size == 0:
|
||||
continue
|
||||
|
||||
t0 = time.monotonic()
|
||||
if inference_url:
|
||||
raw_results = client.ocr(image=crop, languages=config.languages)
|
||||
texts = [{"text": r.text, "confidence": r.confidence} for r in raw_results]
|
||||
else:
|
||||
raw = model.ocr(crop)
|
||||
texts = _parse_ocr_raw(raw, config.min_confidence)
|
||||
ocr_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
h, w = crop.shape[:2]
|
||||
text_preview = ", ".join(t["text"][:30] for t in texts) if texts else "(none)"
|
||||
emit.log(job_id, "OCRStage", "DEBUG",
|
||||
f"Frame {seq} box {box.x},{box.y} ({w}x{h}): {ocr_ms:.0f}ms → {text_preview}")
|
||||
|
||||
for t in texts:
|
||||
candidates.append(TextCandidate(
|
||||
|
||||
@@ -9,6 +9,8 @@ CV stages without losing unique visual content.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
|
||||
@@ -63,8 +65,16 @@ def scene_filter(
|
||||
emit.log(job_id, "SceneFilter", "INFO",
|
||||
f"Filtering {len(frames)} frames (hamming_threshold={config.hamming_threshold})")
|
||||
|
||||
t0 = time.monotonic()
|
||||
hashes = _compute_hashes(frames)
|
||||
hash_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "SceneFilter", "DEBUG",
|
||||
f"Computed {len(hashes)} perceptual hashes in {hash_ms:.0f}ms ({hash_ms/max(len(hashes),1):.1f}ms/frame)")
|
||||
|
||||
t0 = time.monotonic()
|
||||
kept = _dedup(frames, hashes, config.hamming_threshold)
|
||||
dedup_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "SceneFilter", "DEBUG", f"Dedup pass: {dedup_ms:.0f}ms")
|
||||
|
||||
dropped = len(frames) - len(kept)
|
||||
pct = (dropped / len(frames) * 100) if frames else 0
|
||||
|
||||
@@ -13,6 +13,8 @@ from __future__ import annotations
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
@@ -108,6 +110,11 @@ def escalate_cloud(
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
if os.environ.get("SKIP_CLOUD", "").strip() == "1":
|
||||
emit.log(job_id, "CloudLLM", "INFO",
|
||||
f"SKIP_CLOUD=1, skipping {len(candidates)} crops")
|
||||
return []
|
||||
|
||||
if not has_api_key():
|
||||
emit.log(job_id, "CloudLLM", "WARNING",
|
||||
f"No API key set for cloud provider, skipping {len(candidates)} crops")
|
||||
@@ -120,7 +127,7 @@ def escalate_cloud(
|
||||
matched: list[BrandDetection] = []
|
||||
total_cost = 0.0
|
||||
|
||||
for candidate in candidates:
|
||||
for i, candidate in enumerate(candidates):
|
||||
crop = _crop_image(candidate)
|
||||
if crop.size == 0:
|
||||
continue
|
||||
@@ -133,11 +140,15 @@ def escalate_cloud(
|
||||
prompt = vlm_prompt_fn(crop_context)
|
||||
image_b64 = _encode_crop(crop)
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
result = _call_cloud_api(image_b64, prompt)
|
||||
except Exception as e:
|
||||
logger.warning("Cloud LLM failed for '%s': %s", candidate.text, e)
|
||||
call_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "CloudLLM", "DEBUG",
|
||||
f"[{i+1}/{len(candidates)}] FAILED '{candidate.text[:30]}': {e} ({call_ms:.0f}ms)")
|
||||
continue
|
||||
call_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
stats.cloud_llm_calls += 1
|
||||
model_info = provider.models.get(provider.model)
|
||||
@@ -148,6 +159,11 @@ def escalate_cloud(
|
||||
brand = result["brand"]
|
||||
confidence = result["confidence"]
|
||||
|
||||
emit.log(job_id, "CloudLLM", "DEBUG",
|
||||
f"[{i+1}/{len(candidates)}] '{candidate.text[:30]}' → "
|
||||
f"{'✓ ' + brand if brand else '✗'} "
|
||||
f"(conf={confidence:.2f}, {result['tokens']}tok, ${call_cost:.4f}, {call_ms:.0f}ms)")
|
||||
|
||||
if brand and confidence >= min_confidence:
|
||||
detection = BrandDetection(
|
||||
brand=brand,
|
||||
|
||||
@@ -9,6 +9,8 @@ objects for crops the VLM can identify.
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -61,6 +63,11 @@ def escalate_vlm(
|
||||
if not candidates:
|
||||
return [], []
|
||||
|
||||
if os.environ.get("SKIP_VLM", "").strip() == "1":
|
||||
emit.log(job_id, "VLMLocal", "INFO",
|
||||
f"SKIP_VLM=1, skipping {len(candidates)} crops")
|
||||
return [], candidates
|
||||
|
||||
emit.log(job_id, "VLMLocal", "INFO",
|
||||
f"Processing {len(candidates)} unresolved crops with moondream2")
|
||||
|
||||
@@ -69,9 +76,10 @@ def escalate_vlm(
|
||||
|
||||
if inference_url:
|
||||
from detect.inference import InferenceClient
|
||||
client = InferenceClient(base_url=inference_url)
|
||||
from detect.emit import _run_log_level
|
||||
client = InferenceClient(base_url=inference_url, job_id=job_id or "", log_level=_run_log_level)
|
||||
|
||||
for candidate in candidates:
|
||||
for i, candidate in enumerate(candidates):
|
||||
crop = _crop_image(candidate)
|
||||
if crop.size == 0:
|
||||
still_unresolved.append(candidate)
|
||||
@@ -84,6 +92,7 @@ def escalate_vlm(
|
||||
)
|
||||
prompt = vlm_prompt_fn(crop_context)
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
if inference_url:
|
||||
result = client.vlm(image=crop, prompt=prompt)
|
||||
@@ -93,9 +102,16 @@ def escalate_vlm(
|
||||
else:
|
||||
brand, confidence, reasoning = _vlm_local(crop, prompt)
|
||||
except Exception as e:
|
||||
logger.warning("VLM failed for candidate '%s': %s", candidate.text, e)
|
||||
vlm_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "VLMLocal", "DEBUG",
|
||||
f"[{i+1}/{len(candidates)}] FAILED '{candidate.text[:30]}': {e} ({vlm_ms:.0f}ms)")
|
||||
still_unresolved.append(candidate)
|
||||
continue
|
||||
vlm_ms = (time.monotonic() - t0) * 1000
|
||||
emit.log(job_id, "VLMLocal", "DEBUG",
|
||||
f"[{i+1}/{len(candidates)}] '{candidate.text[:30]}' → "
|
||||
f"{'✓ ' + brand if brand else '✗ unresolved'} "
|
||||
f"(conf={confidence:.2f}, {vlm_ms:.0f}ms)")
|
||||
|
||||
if brand and confidence >= min_confidence:
|
||||
detection = BrandDetection(
|
||||
|
||||
@@ -14,6 +14,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
|
||||
from PIL import Image
|
||||
|
||||
@@ -32,10 +33,11 @@ def _frame_to_b64(frame: Frame) -> str:
|
||||
return base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
|
||||
def _detect_remote(frame: Frame, config: DetectionConfig, inference_url: str) -> list[BoundingBox]:
|
||||
def _detect_remote(frame: Frame, config: DetectionConfig, inference_url: str,
|
||||
job_id: str = "", log_level: str = "INFO") -> list[BoundingBox]:
|
||||
"""Call the inference server over HTTP."""
|
||||
from detect.inference import InferenceClient
|
||||
client = InferenceClient(base_url=inference_url)
|
||||
client = InferenceClient(base_url=inference_url, job_id=job_id, log_level=log_level)
|
||||
results = client.detect(
|
||||
image=frame.image,
|
||||
model=config.model_name,
|
||||
@@ -99,15 +101,24 @@ def detect_objects(
|
||||
all_boxes: dict[int, list[BoundingBox]] = {}
|
||||
total_regions = 0
|
||||
|
||||
for frame in frames:
|
||||
for i, frame in enumerate(frames):
|
||||
t0 = time.monotonic()
|
||||
if inference_url:
|
||||
boxes = _detect_remote(frame, config, inference_url)
|
||||
from detect.emit import _run_log_level
|
||||
boxes = _detect_remote(frame, config, inference_url,
|
||||
job_id=job_id or "", log_level=_run_log_level)
|
||||
else:
|
||||
boxes = _detect_local(frame, config)
|
||||
det_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
all_boxes[frame.sequence] = boxes
|
||||
total_regions += len(boxes)
|
||||
|
||||
emit.log(job_id, "YOLODetector", "DEBUG",
|
||||
f"Frame {frame.sequence}: {len(boxes)} regions in {det_ms:.0f}ms"
|
||||
f" [{', '.join(b.label for b in boxes)}]" if boxes else
|
||||
f"Frame {frame.sequence}: 0 regions in {det_ms:.0f}ms")
|
||||
|
||||
if boxes and job_id:
|
||||
box_dicts = [{"x": b.x, "y": b.y, "w": b.w, "h": b.h,
|
||||
"confidence": b.confidence, "label": b.label}
|
||||
|
||||
Reference in New Issue
Block a user