add heavy loggin
This commit is contained in:
52
gpu/emit.py
Normal file
52
gpu/emit.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
Lightweight event emitter for the GPU inference server.
|
||||
|
||||
Pushes debug logs to the same Redis stream as the pipeline orchestrator,
|
||||
so GPU-side details (model load, VRAM, inference timing) appear in the
|
||||
same log panel.
|
||||
|
||||
Only active when the request includes X-Job-Id header.
|
||||
No dependency on the detect package.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import redis
|
||||
|
||||
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
|
||||
EVENTS_PREFIX = "detect_events"
|
||||
|
||||
_LEVEL_ORDER = {"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3}
|
||||
|
||||
_redis_client = None
|
||||
|
||||
|
||||
def _get_redis():
|
||||
global _redis_client
|
||||
if _redis_client is None:
|
||||
_redis_client = redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _redis_client
|
||||
|
||||
|
||||
def log(job_id: str, stage: str, level: str, msg: str, log_level: str = "INFO"):
|
||||
"""Push a log event to Redis if the level meets the threshold."""
|
||||
if not job_id:
|
||||
return
|
||||
if _LEVEL_ORDER.get(level.upper(), 1) < _LEVEL_ORDER.get(log_level.upper(), 1):
|
||||
return
|
||||
|
||||
r = _get_redis()
|
||||
key = f"{EVENTS_PREFIX}:{job_id}"
|
||||
event = json.dumps({
|
||||
"event": "log",
|
||||
"level": level,
|
||||
"stage": stage,
|
||||
"msg": msg,
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
r.rpush(key, event)
|
||||
r.expire(key, 3600)
|
||||
@@ -2,6 +2,7 @@ fastapi>=0.109.0
|
||||
uvicorn[standard]>=0.27.0
|
||||
rapidfuzz>=3.0.0
|
||||
Pillow>=10.0.0
|
||||
redis>=5.0.0
|
||||
|
||||
# --- GPU-specific installs (mcrn: RTX 3080, CUDA toolkit 12.8) ---
|
||||
#
|
||||
|
||||
@@ -14,13 +14,16 @@ import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel
|
||||
|
||||
from emit import log as emit_log
|
||||
|
||||
from config import get_config, get_device, update_config
|
||||
from models import registry
|
||||
from models.yolo import detect as yolo_detect
|
||||
@@ -36,6 +39,19 @@ def _decode_image(b64: str) -> np.ndarray:
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def _job_ctx(request: Request) -> tuple[str, str]:
|
||||
"""Extract job_id and log_level from request headers."""
|
||||
job_id = request.headers.get("x-job-id", "")
|
||||
log_level = request.headers.get("x-log-level", "INFO")
|
||||
return job_id, log_level
|
||||
|
||||
|
||||
def _gpu_log(job_id: str, log_level: str, stage: str, level: str, msg: str):
|
||||
"""Emit a log event if job context is present."""
|
||||
if job_id:
|
||||
emit_log(job_id, stage, level, msg, log_level=log_level)
|
||||
|
||||
|
||||
# --- Request/Response models ---
|
||||
|
||||
class DetectRequest(BaseModel):
|
||||
@@ -160,19 +176,31 @@ def unload_model(body: dict):
|
||||
|
||||
|
||||
@app.post("/detect", response_model=DetectResponse)
|
||||
def detect(req: DetectRequest):
|
||||
def detect(req: DetectRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
image = _decode_image(req.image)
|
||||
decode_ms = (time.monotonic() - t0) * 1000
|
||||
h, w = image.shape[:2]
|
||||
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
|
||||
f"Decoded {w}x{h} image in {decode_ms:.0f}ms")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
results = yolo_detect(
|
||||
image,
|
||||
model_name=req.model,
|
||||
confidence=req.confidence,
|
||||
target_classes=req.target_classes,
|
||||
)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
|
||||
f"Inference: {len(results)} detections in {infer_ms:.0f}ms "
|
||||
f"(model={req.model}, conf={req.confidence})")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Detection failed: {e}")
|
||||
|
||||
@@ -180,14 +208,22 @@ def detect(req: DetectRequest):
|
||||
|
||||
|
||||
@app.post("/ocr", response_model=OCRResponse)
|
||||
def ocr(req: OCRRequest):
|
||||
def ocr(req: OCRRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
results = ocr_run(image, languages=req.languages)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
texts = [r["text"][:20] for r in results]
|
||||
_gpu_log(job_id, log_level, "GPU:OCR", "DEBUG",
|
||||
f"OCR {w}x{h}: {infer_ms:.0f}ms → {len(results)} results {texts}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
||||
|
||||
@@ -223,14 +259,22 @@ def preprocess_image(req: PreprocessRequest):
|
||||
|
||||
|
||||
@app.post("/vlm", response_model=VLMResponse)
|
||||
def vlm(req: VLMRequest):
|
||||
def vlm(req: VLMRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
result = vlm_query(image, req.prompt)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
_gpu_log(job_id, log_level, "GPU:VLM", "DEBUG",
|
||||
f"VLM {w}x{h}: {infer_ms:.0f}ms → "
|
||||
f"brand='{result.get('brand', '')}' conf={result.get('confidence', 0):.2f}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"VLM failed: {e}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user