add heavy loggin

This commit is contained in:
2026-03-26 10:59:56 -03:00
parent a85722f96a
commit beb0416280
27 changed files with 502 additions and 64 deletions

View File

@@ -14,13 +14,16 @@ import base64
import io
import logging
import os
import time
from contextlib import asynccontextmanager
import numpy as np
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Request
from PIL import Image
from pydantic import BaseModel
from emit import log as emit_log
from config import get_config, get_device, update_config
from models import registry
from models.yolo import detect as yolo_detect
@@ -36,6 +39,19 @@ def _decode_image(b64: str) -> np.ndarray:
return np.array(img)
def _job_ctx(request: Request) -> tuple[str, str]:
"""Extract job_id and log_level from request headers."""
job_id = request.headers.get("x-job-id", "")
log_level = request.headers.get("x-log-level", "INFO")
return job_id, log_level
def _gpu_log(job_id: str, log_level: str, stage: str, level: str, msg: str):
"""Emit a log event if job context is present."""
if job_id:
emit_log(job_id, stage, level, msg, log_level=log_level)
# --- Request/Response models ---
class DetectRequest(BaseModel):
@@ -160,19 +176,31 @@ def unload_model(body: dict):
@app.post("/detect", response_model=DetectResponse)
def detect(req: DetectRequest):
def detect(req: DetectRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
t0 = time.monotonic()
image = _decode_image(req.image)
decode_ms = (time.monotonic() - t0) * 1000
h, w = image.shape[:2]
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
f"Decoded {w}x{h} image in {decode_ms:.0f}ms")
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
results = yolo_detect(
image,
model_name=req.model,
confidence=req.confidence,
target_classes=req.target_classes,
)
infer_ms = (time.monotonic() - t0) * 1000
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
f"Inference: {len(results)} detections in {infer_ms:.0f}ms "
f"(model={req.model}, conf={req.confidence})")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Detection failed: {e}")
@@ -180,14 +208,22 @@ def detect(req: DetectRequest):
@app.post("/ocr", response_model=OCRResponse)
def ocr(req: OCRRequest):
def ocr(req: OCRRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
image = _decode_image(req.image)
h, w = image.shape[:2]
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
results = ocr_run(image, languages=req.languages)
infer_ms = (time.monotonic() - t0) * 1000
texts = [r["text"][:20] for r in results]
_gpu_log(job_id, log_level, "GPU:OCR", "DEBUG",
f"OCR {w}x{h}: {infer_ms:.0f}ms → {len(results)} results {texts}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
@@ -223,14 +259,22 @@ def preprocess_image(req: PreprocessRequest):
@app.post("/vlm", response_model=VLMResponse)
def vlm(req: VLMRequest):
def vlm(req: VLMRequest, request: Request):
job_id, log_level = _job_ctx(request)
try:
image = _decode_image(req.image)
h, w = image.shape[:2]
except Exception as e:
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
try:
t0 = time.monotonic()
result = vlm_query(image, req.prompt)
infer_ms = (time.monotonic() - t0) * 1000
_gpu_log(job_id, log_level, "GPU:VLM", "DEBUG",
f"VLM {w}x{h}: {infer_ms:.0f}ms → "
f"brand='{result.get('brand', '')}' conf={result.get('confidence', 0):.2f}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"VLM failed: {e}")