add heavy loggin

2026-03-26 10:59:56 -03:00
parent a85722f96a
commit beb0416280
27 changed files with 502 additions and 64 deletions
--- a/gpu/emit.py
+++ b/gpu/emit.py
@@ -0,0 +1,52 @@
+"""
+Lightweight event emitter for the GPU inference server.
+
+Pushes debug logs to the same Redis stream as the pipeline orchestrator,
+so GPU-side details (model load, VRAM, inference timing) appear in the
+same log panel.
+
+Only active when the request includes X-Job-Id header.
+No dependency on the detect package.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone
+
+import redis
+
+REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
+EVENTS_PREFIX = "detect_events"
+
+_LEVEL_ORDER = {"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3}
+
+_redis_client = None
+
+
+def _get_redis():
+    global _redis_client
+    if _redis_client is None:
+        _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
+    return _redis_client
+
+
+def log(job_id: str, stage: str, level: str, msg: str, log_level: str = "INFO"):
+    """Push a log event to Redis if the level meets the threshold."""
+    if not job_id:
+        return
+    if _LEVEL_ORDER.get(level.upper(), 1) < _LEVEL_ORDER.get(log_level.upper(), 1):
+        return
+
+    r = _get_redis()
+    key = f"{EVENTS_PREFIX}:{job_id}"
+    event = json.dumps({
+        "event": "log",
+        "level": level,
+        "stage": stage,
+        "msg": msg,
+        "ts": datetime.now(timezone.utc).isoformat(),
+    })
+    r.rpush(key, event)
+    r.expire(key, 3600)
--- a/gpu/requirements.txt
+++ b/gpu/requirements.txt
@@ -2,6 +2,7 @@ fastapi>=0.109.0
 uvicorn[standard]>=0.27.0
 rapidfuzz>=3.0.0
 Pillow>=10.0.0
+redis>=5.0.0

 # --- GPU-specific installs (mcrn: RTX 3080, CUDA toolkit 12.8) ---
 #
--- a/gpu/server.py
+++ b/gpu/server.py
@@ -14,13 +14,16 @@ import base64
 import io
 import logging
 import os
+import time
 from contextlib import asynccontextmanager

 import numpy as np
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, HTTPException, Request
 from PIL import Image
 from pydantic import BaseModel

+from emit import log as emit_log
+
 from config import get_config, get_device, update_config
 from models import registry
 from models.yolo import detect as yolo_detect
@@ -36,6 +39,19 @@ def _decode_image(b64: str) -> np.ndarray:
    return np.array(img)


+def _job_ctx(request: Request) -> tuple[str, str]:
+    """Extract job_id and log_level from request headers."""
+    job_id = request.headers.get("x-job-id", "")
+    log_level = request.headers.get("x-log-level", "INFO")
+    return job_id, log_level
+
+
+def _gpu_log(job_id: str, log_level: str, stage: str, level: str, msg: str):
+    """Emit a log event if job context is present."""
+    if job_id:
+        emit_log(job_id, stage, level, msg, log_level=log_level)
+
+
 # --- Request/Response models ---

 class DetectRequest(BaseModel):
@@ -160,19 +176,31 @@ def unload_model(body: dict):


@app.post("/detect", response_model=DetectResponse)
-def detect(req: DetectRequest):
+def detect(req: DetectRequest, request: Request):
+    job_id, log_level = _job_ctx(request)
+
    try:
+        t0 = time.monotonic()
        image = _decode_image(req.image)
+        decode_ms = (time.monotonic() - t0) * 1000
+        h, w = image.shape[:2]
+        _gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
+                 f"Decoded {w}x{h} image in {decode_ms:.0f}ms")
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Bad image: {e}")

    try:
+        t0 = time.monotonic()
        results = yolo_detect(
            image,
            model_name=req.model,
            confidence=req.confidence,
            target_classes=req.target_classes,
        )
+        infer_ms = (time.monotonic() - t0) * 1000
+        _gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
+                 f"Inference: {len(results)} detections in {infer_ms:.0f}ms "
+                 f"(model={req.model}, conf={req.confidence})")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Detection failed: {e}")

@@ -180,14 +208,22 @@ def detect(req: DetectRequest):


@app.post("/ocr", response_model=OCRResponse)
-def ocr(req: OCRRequest):
+def ocr(req: OCRRequest, request: Request):
+    job_id, log_level = _job_ctx(request)
+
    try:
        image = _decode_image(req.image)
+        h, w = image.shape[:2]
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Bad image: {e}")

    try:
+        t0 = time.monotonic()
        results = ocr_run(image, languages=req.languages)
+        infer_ms = (time.monotonic() - t0) * 1000
+        texts = [r["text"][:20] for r in results]
+        _gpu_log(job_id, log_level, "GPU:OCR", "DEBUG",
+                 f"OCR {w}x{h}: {infer_ms:.0f}ms → {len(results)} results {texts}")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"OCR failed: {e}")

@@ -223,14 +259,22 @@ def preprocess_image(req: PreprocessRequest):


@app.post("/vlm", response_model=VLMResponse)
-def vlm(req: VLMRequest):
+def vlm(req: VLMRequest, request: Request):
+    job_id, log_level = _job_ctx(request)
+
    try:
        image = _decode_image(req.image)
+        h, w = image.shape[:2]
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Bad image: {e}")

    try:
+        t0 = time.monotonic()
        result = vlm_query(image, req.prompt)
+        infer_ms = (time.monotonic() - t0) * 1000
+        _gpu_log(job_id, log_level, "GPU:VLM", "DEBUG",
+                 f"VLM {w}x{h}: {infer_ms:.0f}ms → "
+                 f"brand='{result.get('brand', '')}' conf={result.get('confidence', 0):.2f}")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"VLM failed: {e}")