add heavy loggin

This commit is contained in:
2026-03-26 10:59:56 -03:00
parent a85722f96a
commit beb0416280
27 changed files with 502 additions and 64 deletions

52
gpu/emit.py Normal file
View File

@@ -0,0 +1,52 @@
"""
Lightweight event emitter for the GPU inference server.
Pushes debug logs to the same Redis stream as the pipeline orchestrator,
so GPU-side details (model load, VRAM, inference timing) appear in the
same log panel.
Only active when the request includes X-Job-Id header.
No dependency on the detect package.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
import redis
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
EVENTS_PREFIX = "detect_events"
_LEVEL_ORDER = {"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3}
_redis_client = None
def _get_redis():
global _redis_client
if _redis_client is None:
_redis_client = redis.from_url(REDIS_URL, decode_responses=True)
return _redis_client
def log(job_id: str, stage: str, level: str, msg: str, log_level: str = "INFO"):
"""Push a log event to Redis if the level meets the threshold."""
if not job_id:
return
if _LEVEL_ORDER.get(level.upper(), 1) < _LEVEL_ORDER.get(log_level.upper(), 1):
return
r = _get_redis()
key = f"{EVENTS_PREFIX}:{job_id}"
event = json.dumps({
"event": "log",
"level": level,
"stage": stage,
"msg": msg,
"ts": datetime.now(timezone.utc).isoformat(),
})
r.rpush(key, event)
r.expire(key, 3600)

View File

@@ -2,6 +2,7 @@ fastapi>=0.109.0
uvicorn[standard]>=0.27.0
rapidfuzz>=3.0.0
Pillow>=10.0.0
redis>=5.0.0
# --- GPU-specific installs (mcrn: RTX 3080, CUDA toolkit 12.8) ---
#

View File

@@ -14,13 +14,16 @@ import base64
import io
import logging
import os
import time
from contextlib import asynccontextmanager
import numpy as np
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Request
from PIL import Image
from pydantic import BaseModel
from emit import log as emit_log
from config import get_config, get_device, update_config
from models import registry
from models.yolo import detect as yolo_detect
@@ -36,6 +39,19 @@ def _decode_image(b64: str) -> np.ndarray:
return np.array(img)
def _job_ctx(request: Request) -> tuple[str, str]:
"""Extract job_id and log_level from request headers."""
job_id = request.headers.get("x-job-id", "")
log_level = request.headers.get("x-log-level", "INFO")
return job_id, log_level
def _gpu_log(job_id: str, log_level: str, stage: str, level: str, msg: str):
"""Emit a log event if job context is present."""
if job_id:
emit_log(job_id, stage, level, msg, log_level=log_level)
# --- Request/Response models ---
class DetectRequest(BaseModel):
@@ -160,19 +176,31 @@ def unload_model(body: dict):
@app.post("/detect", response_model=DetectResponse)
def detect(req: DetectRequest):
def detect(req: DetectRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
t0 = time.monotonic()
image = _decode_image(req.image)
decode_ms = (time.monotonic() - t0) * 1000
h, w = image.shape[:2]
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
f"Decoded {w}x{h} image in {decode_ms:.0f}ms")
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
results = yolo_detect(
image,
model_name=req.model,
confidence=req.confidence,
target_classes=req.target_classes,
)
infer_ms = (time.monotonic() - t0) * 1000
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
f"Inference: {len(results)} detections in {infer_ms:.0f}ms "
f"(model={req.model}, conf={req.confidence})")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Detection failed: {e}")
@@ -180,14 +208,22 @@ def detect(req: DetectRequest):
@app.post("/ocr", response_model=OCRResponse)
def ocr(req: OCRRequest):
def ocr(req: OCRRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
image = _decode_image(req.image)
h, w = image.shape[:2]
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
results = ocr_run(image, languages=req.languages)
infer_ms = (time.monotonic() - t0) * 1000
texts = [r["text"][:20] for r in results]
_gpu_log(job_id, log_level, "GPU:OCR", "DEBUG",
f"OCR {w}x{h}: {infer_ms:.0f}ms → {len(results)} results {texts}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
@@ -223,14 +259,22 @@ def preprocess_image(req: PreprocessRequest):
@app.post("/vlm", response_model=VLMResponse)
def vlm(req: VLMRequest):
def vlm(req: VLMRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
image = _decode_image(req.image)
h, w = image.shape[:2]
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
result = vlm_query(image, req.prompt)
infer_ms = (time.monotonic() - t0) * 1000
_gpu_log(job_id, log_level, "GPU:VLM", "DEBUG",
f"VLM {w}x{h}: {infer_ms:.0f}ms → "
f"brand='{result.get('brand', '')}' conf={result.get('confidence', 0):.2f}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"VLM failed: {e}")