This commit is contained in:
2026-03-23 16:55:13 -03:00
parent 4fdbdfc6d3
commit 3df9ed5ada
17 changed files with 848 additions and 4 deletions

14
gpu/.env.template Normal file
View File

@@ -0,0 +1,14 @@
# Inference server configuration
HOST=0.0.0.0
PORT=8000
# VRAM management
VRAM_BUDGET_MB=10240
STRATEGY=sequential # sequential | concurrent | auto
# Model defaults
YOLO_MODEL=yolov8n.pt
YOLO_CONFIDENCE=0.3
# Device
DEVICE=auto # auto | cpu | cuda | cuda:0

18
gpu/Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM python:3.11-slim
RUN pip install --no-cache-dir uv
RUN apt-get update && apt-get install -y \
libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN uv pip install --system --no-cache -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["python", "server.py"]

4
gpu/requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
ultralytics>=8.0.0
Pillow>=10.0.0

54
gpu/run.sh Executable file
View File

@@ -0,0 +1,54 @@
#!/bin/bash
# Run the inference server
#
# Usage:
# ./run.sh # Local (pip install -r requirements.txt first)
# ./run.sh docker # Docker (CPU)
# ./run.sh docker-gpu # Docker with GPU
# ./run.sh stop # Stop Docker container
set -e
cd "$(dirname "${BASH_SOURCE[0]}")"
# Load env (create from template if missing)
if [ ! -f .env ]; then
if [ -f .env.template ]; then
cp .env.template .env
echo "Created .env from template — edit as needed"
fi
fi
if [ -f .env ]; then
set -a
source .env
set +a
fi
case "${1:-local}" in
local)
python server.py
;;
docker)
docker build -t mpr-inference .
ENV_FLAG=""; [ -f .env ] && ENV_FLAG="--env-file .env"
docker run --rm -p "${PORT:-8000}:8000" \
$ENV_FLAG \
--name mpr-inference \
mpr-inference
;;
docker-gpu)
docker build -t mpr-inference .
ENV_FLAG=""; [ -f .env ] && ENV_FLAG="--env-file .env"
docker run --rm --gpus all -p "${PORT:-8000}:8000" \
$ENV_FLAG \
--name mpr-inference \
mpr-inference
;;
stop)
docker stop mpr-inference 2>/dev/null || true
;;
*)
echo "Usage: ./run.sh [local|docker|docker-gpu|stop]"
exit 1
;;
esac

206
gpu/server.py Normal file
View File

@@ -0,0 +1,206 @@
"""
Inference server — thin HTTP wrapper around ML models.
Runs on the GPU machine. The detection pipeline calls this over HTTP,
or imports the same logic locally if GPU is on the same machine.
Config is loaded from env on startup, then editable at runtime via
GET/PUT /config. The UI config panel is just a visual editor for these
same values.
Usage:
cd gpu && uvicorn server:app --host 0.0.0.0 --port 8000
# or
cd gpu && python server.py
"""
from __future__ import annotations
import base64
import io
import logging
import os
from contextlib import asynccontextmanager
import numpy as np
from fastapi import FastAPI, HTTPException
from PIL import Image
from pydantic import BaseModel
logger = logging.getLogger(__name__)
# --- Runtime config (loaded from env, mutable via API) ---
_config = {
"device": os.environ.get("DEVICE", "auto"),
"yolo_model": os.environ.get("YOLO_MODEL", "yolov8n.pt"),
"yolo_confidence": float(os.environ.get("YOLO_CONFIDENCE", "0.3")),
"vram_budget_mb": int(os.environ.get("VRAM_BUDGET_MB", "10240")),
"strategy": os.environ.get("STRATEGY", "sequential"),
}
# --- Model registry ---
_models: dict[str, object] = {}
# --- Helpers ---
def _get_device() -> str:
device = _config["device"]
if device != "auto":
return device
try:
import torch
return "cuda" if torch.cuda.is_available() else "cpu"
except ImportError:
return "cpu"
def _get_yolo(model_name: str | None = None):
name = model_name or _config["yolo_model"]
if name not in _models:
from ultralytics import YOLO
device = _get_device()
logger.info("Loading %s on %s", name, device)
model = YOLO(name)
model.to(device)
_models[name] = model
return _models[name]
def _decode_image(b64: str) -> np.ndarray:
data = base64.b64decode(b64)
img = Image.open(io.BytesIO(data)).convert("RGB")
return np.array(img)
# --- Request/Response models ---
class DetectRequest(BaseModel):
image: str # base64 JPEG
model: str | None = None # defaults to config yolo_model
confidence: float | None = None # defaults to config yolo_confidence
target_classes: list[str] | None = None
class BBox(BaseModel):
x: int
y: int
w: int
h: int
confidence: float
label: str
class DetectResponse(BaseModel):
detections: list[BBox]
class ConfigUpdate(BaseModel):
"""Partial config update — only provided fields are changed."""
device: str | None = None
yolo_model: str | None = None
yolo_confidence: float | None = None
vram_budget_mb: int | None = None
strategy: str | None = None
# --- App ---
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("Inference server starting (device=%s)", _get_device())
yield
logger.info("Inference server shutting down")
_models.clear()
app = FastAPI(title="MPR Inference Server", lifespan=lifespan)
@app.get("/health")
def health():
return {
"status": "ok",
"device": _get_device(),
"loaded_models": list(_models.keys()),
"vram_budget_mb": _config["vram_budget_mb"],
"strategy": _config["strategy"],
}
@app.get("/config")
def get_config():
"""Current runtime config. Same values the .env sets at startup."""
return {**_config, "device_resolved": _get_device()}
@app.put("/config")
def update_config(update: ConfigUpdate):
"""Update runtime config. Only provided fields are changed."""
changes = update.model_dump(exclude_none=True)
if not changes:
return _config
# If model changed, unload the old one so it gets reloaded on next request
if "yolo_model" in changes and changes["yolo_model"] != _config["yolo_model"]:
old = _config["yolo_model"]
if old in _models:
del _models[old]
logger.info("Unloaded %s (model changed)", old)
_config.update(changes)
logger.info("Config updated: %s", changes)
return {**_config, "device_resolved": _get_device()}
@app.post("/models/unload")
def unload_model(body: dict):
"""Unload a model from memory to free VRAM."""
name = body.get("model", "")
if name in _models:
del _models[name]
logger.info("Unloaded %s", name)
return {"status": "unloaded", "model": name}
return {"status": "not_loaded", "model": name}
@app.post("/detect", response_model=DetectResponse)
def detect(req: DetectRequest):
model_name = req.model or _config["yolo_model"]
confidence = req.confidence if req.confidence is not None else _config["yolo_confidence"]
try:
model = _get_yolo(model_name)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to load model: {e}")
image = _decode_image(req.image)
results = model(image, conf=confidence, verbose=False)
detections = []
for r in results:
for box in r.boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
label = r.names[int(box.cls[0])]
if req.target_classes and label not in req.target_classes:
continue
det = BBox(
x=int(x1), y=int(y1),
w=int(x2 - x1), h=int(y2 - y1),
confidence=float(box.conf[0]),
label=label,
)
detections.append(det)
return DetectResponse(detections=detections)
if __name__ == "__main__":
import uvicorn
logging.basicConfig(level=logging.INFO, format="%(levelname)-7s %(name)s%(message)s")
host = os.environ.get("HOST", "0.0.0.0")
port = int(os.environ.get("PORT", "8000"))
uvicorn.run(app, host=host, port=port)