phase 4
This commit is contained in:
18
core/gpu/.env.template
Normal file
18
core/gpu/.env.template
Normal file
@@ -0,0 +1,18 @@
|
||||
# Inference server configuration
|
||||
HOST=0.0.0.0
|
||||
PORT=8000
|
||||
|
||||
# VRAM management
|
||||
VRAM_BUDGET_MB=10240
|
||||
STRATEGY=sequential # sequential | concurrent | auto
|
||||
|
||||
# Model defaults
|
||||
YOLO_MODEL=yolov8n.pt
|
||||
YOLO_CONFIDENCE=0.3
|
||||
|
||||
# OCR
|
||||
OCR_LANGUAGES=en,es
|
||||
OCR_MIN_CONFIDENCE=0.5
|
||||
|
||||
# Device
|
||||
DEVICE=auto # auto | cpu | cuda | cuda:0
|
||||
18
core/gpu/Dockerfile
Normal file
18
core/gpu/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN pip install --no-cache-dir uv
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgl1 libglib2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN uv pip install --system --no-cache -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["python", "server.py"]
|
||||
0
core/gpu/__init__.py
Normal file
0
core/gpu/__init__.py
Normal file
39
core/gpu/config.py
Normal file
39
core/gpu/config.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""
|
||||
Runtime config — loaded from env, mutable via API.
|
||||
|
||||
The UI config panel is just a visual editor for these same values.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
_config = {
|
||||
"device": os.environ.get("DEVICE", "auto"),
|
||||
"yolo_model": os.environ.get("YOLO_MODEL", "yolov8n.pt"),
|
||||
"yolo_confidence": float(os.environ.get("YOLO_CONFIDENCE", "0.3")),
|
||||
"vram_budget_mb": int(os.environ.get("VRAM_BUDGET_MB", "10240")),
|
||||
"strategy": os.environ.get("STRATEGY", "sequential"),
|
||||
"ocr_languages": os.environ.get("OCR_LANGUAGES", "en").split(","),
|
||||
"ocr_min_confidence": float(os.environ.get("OCR_MIN_CONFIDENCE", "0.5")),
|
||||
}
|
||||
|
||||
|
||||
def get_config() -> dict:
|
||||
return _config
|
||||
|
||||
|
||||
def update_config(changes: dict) -> dict:
|
||||
_config.update(changes)
|
||||
return _config
|
||||
|
||||
|
||||
def get_device() -> str:
|
||||
device = _config["device"]
|
||||
if device != "auto":
|
||||
return device
|
||||
try:
|
||||
import torch
|
||||
return "cuda" if torch.cuda.is_available() else "cpu"
|
||||
except ImportError:
|
||||
return "cpu"
|
||||
52
core/gpu/emit.py
Normal file
52
core/gpu/emit.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
Lightweight event emitter for the GPU inference server.
|
||||
|
||||
Pushes debug logs to the same Redis stream as the pipeline orchestrator,
|
||||
so GPU-side details (model load, VRAM, inference timing) appear in the
|
||||
same log panel.
|
||||
|
||||
Only active when the request includes X-Job-Id header.
|
||||
No dependency on the detect package.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import redis
|
||||
|
||||
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
|
||||
EVENTS_PREFIX = "detect_events"
|
||||
|
||||
_LEVEL_ORDER = {"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3}
|
||||
|
||||
_redis_client = None
|
||||
|
||||
|
||||
def _get_redis():
|
||||
global _redis_client
|
||||
if _redis_client is None:
|
||||
_redis_client = redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _redis_client
|
||||
|
||||
|
||||
def log(job_id: str, stage: str, level: str, msg: str, log_level: str = "INFO"):
|
||||
"""Push a log event to Redis if the level meets the threshold."""
|
||||
if not job_id:
|
||||
return
|
||||
if _LEVEL_ORDER.get(level.upper(), 1) < _LEVEL_ORDER.get(log_level.upper(), 1):
|
||||
return
|
||||
|
||||
r = _get_redis()
|
||||
key = f"{EVENTS_PREFIX}:{job_id}"
|
||||
event = json.dumps({
|
||||
"event": "log",
|
||||
"level": level,
|
||||
"stage": stage,
|
||||
"msg": msg,
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
r.rpush(key, event)
|
||||
r.expire(key, 3600)
|
||||
6
core/gpu/models/__init__.py
Normal file
6
core/gpu/models/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# GPU models — standalone container imports.
|
||||
# When running as a container (cd gpu && python server.py), bare imports work.
|
||||
# When imported from the main app (core.gpu.models.preprocess), only
|
||||
# individual modules should be imported directly, not this __init__.
|
||||
#
|
||||
# The server.py imports detect/ocr/vlm directly, not through this file.
|
||||
1
core/gpu/models/cv/__init__.py
Normal file
1
core/gpu/models/cv/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""CV operations — pure OpenCV, no ML models."""
|
||||
258
core/gpu/models/cv/edges.py
Normal file
258
core/gpu/models/cv/edges.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""
|
||||
Edge detection — Canny + HoughLinesP → parallel line pairs → bounding boxes.
|
||||
|
||||
Finds horizontal line pairs with consistent spacing, which correspond to
|
||||
the top and bottom edges of advertising hoardings.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def detect_edges(
|
||||
image: np.ndarray,
|
||||
canny_low: int = 50,
|
||||
canny_high: int = 150,
|
||||
hough_threshold: int = 80,
|
||||
hough_min_length: int = 100,
|
||||
hough_max_gap: int = 10,
|
||||
pair_max_distance: int = 200,
|
||||
pair_min_distance: int = 15,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Find horizontal line pairs that likely bound advertising hoardings.
|
||||
|
||||
Returns list of dicts with keys: x, y, w, h, confidence, label.
|
||||
Each box represents the region between a detected pair of parallel
|
||||
horizontal lines.
|
||||
"""
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
edges = cv2.Canny(gray, canny_low, canny_high)
|
||||
|
||||
raw_lines = cv2.HoughLinesP(
|
||||
edges,
|
||||
rho=1,
|
||||
theta=np.pi / 180,
|
||||
threshold=hough_threshold,
|
||||
minLineLength=hough_min_length,
|
||||
maxLineGap=hough_max_gap,
|
||||
)
|
||||
|
||||
if raw_lines is None:
|
||||
return []
|
||||
|
||||
# Filter to near-horizontal lines (within 10 degrees)
|
||||
horizontals = _filter_horizontal(raw_lines, max_angle_deg=10)
|
||||
|
||||
if len(horizontals) < 2:
|
||||
return []
|
||||
|
||||
# Find pairs of parallel horizontals with consistent spacing
|
||||
pairs = _find_line_pairs(
|
||||
horizontals,
|
||||
min_distance=pair_min_distance,
|
||||
max_distance=pair_max_distance,
|
||||
)
|
||||
|
||||
# Convert pairs to bounding boxes
|
||||
h, w = image.shape[:2]
|
||||
results = []
|
||||
for top_line, bottom_line in pairs:
|
||||
box = _pair_to_bbox(top_line, bottom_line, frame_width=w, frame_height=h)
|
||||
if box is not None:
|
||||
results.append(box)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _filter_horizontal(lines: np.ndarray, max_angle_deg: float = 10) -> list[tuple]:
|
||||
"""Keep only lines within max_angle_deg of horizontal."""
|
||||
max_slope = np.tan(np.radians(max_angle_deg))
|
||||
result = []
|
||||
for line in lines:
|
||||
x1, y1, x2, y2 = line[0]
|
||||
dx = x2 - x1
|
||||
if dx == 0:
|
||||
continue
|
||||
slope = abs((y2 - y1) / dx)
|
||||
if slope <= max_slope:
|
||||
y_mid = (y1 + y2) / 2
|
||||
x_min = min(x1, x2)
|
||||
x_max = max(x1, x2)
|
||||
length = np.sqrt(dx**2 + (y2 - y1) ** 2)
|
||||
result.append((x_min, x_max, y_mid, length))
|
||||
return result
|
||||
|
||||
|
||||
def _find_line_pairs(
|
||||
horizontals: list[tuple],
|
||||
min_distance: int,
|
||||
max_distance: int,
|
||||
) -> list[tuple]:
|
||||
"""
|
||||
Find pairs of horizontal lines that could be top/bottom of a hoarding.
|
||||
|
||||
Lines must overlap horizontally and be spaced within [min_distance, max_distance].
|
||||
"""
|
||||
# Sort by y position
|
||||
sorted_lines = sorted(horizontals, key=lambda l: l[2])
|
||||
|
||||
pairs = []
|
||||
used = set()
|
||||
|
||||
for i, top in enumerate(sorted_lines):
|
||||
if i in used:
|
||||
continue
|
||||
for j, bottom in enumerate(sorted_lines[i + 1 :], start=i + 1):
|
||||
if j in used:
|
||||
continue
|
||||
|
||||
y_gap = bottom[2] - top[2]
|
||||
if y_gap < min_distance:
|
||||
continue
|
||||
if y_gap > max_distance:
|
||||
break # sorted by y, no point checking further
|
||||
|
||||
# Check horizontal overlap
|
||||
overlap_start = max(top[0], bottom[0])
|
||||
overlap_end = min(top[1], bottom[1])
|
||||
overlap = overlap_end - overlap_start
|
||||
|
||||
# Require at least 50% overlap relative to shorter line
|
||||
shorter_length = min(top[1] - top[0], bottom[1] - bottom[0])
|
||||
if shorter_length > 0 and overlap / shorter_length >= 0.5:
|
||||
pairs.append((top, bottom))
|
||||
used.add(i)
|
||||
used.add(j)
|
||||
break
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def _pair_to_bbox(
|
||||
top: tuple,
|
||||
bottom: tuple,
|
||||
frame_width: int,
|
||||
frame_height: int,
|
||||
) -> dict | None:
|
||||
"""Convert a line pair to a bounding box dict."""
|
||||
x = int(max(0, min(top[0], bottom[0])))
|
||||
y = int(max(0, top[2]))
|
||||
x2 = int(min(frame_width, max(top[1], bottom[1])))
|
||||
y2 = int(min(frame_height, bottom[2]))
|
||||
w = x2 - x
|
||||
h = y2 - y
|
||||
|
||||
if w < 20 or h < 5:
|
||||
return None
|
||||
|
||||
# Confidence based on line lengths relative to box width
|
||||
avg_line_length = (top[3] + bottom[3]) / 2
|
||||
coverage = min(1.0, avg_line_length / max(w, 1))
|
||||
|
||||
return {
|
||||
"x": x,
|
||||
"y": y,
|
||||
"w": w,
|
||||
"h": h,
|
||||
"confidence": round(coverage, 3),
|
||||
"label": "edge_region",
|
||||
}
|
||||
|
||||
|
||||
def _np_to_b64_jpeg(image: np.ndarray, quality: int = 70) -> str:
|
||||
"""Encode a numpy image (BGR or grayscale) as base64 JPEG."""
|
||||
ok, buf = cv2.imencode(".jpg", image, [cv2.IMWRITE_JPEG_QUALITY, quality])
|
||||
if not ok:
|
||||
return ""
|
||||
return base64.b64encode(buf.tobytes()).decode()
|
||||
|
||||
|
||||
def detect_edges_debug(
|
||||
image: np.ndarray,
|
||||
canny_low: int = 50,
|
||||
canny_high: int = 150,
|
||||
hough_threshold: int = 80,
|
||||
hough_min_length: int = 100,
|
||||
hough_max_gap: int = 10,
|
||||
pair_max_distance: int = 200,
|
||||
pair_min_distance: int = 15,
|
||||
) -> dict:
|
||||
"""
|
||||
Same as detect_edges but returns intermediate visualizations.
|
||||
|
||||
Returns dict with:
|
||||
regions: list[dict] — same boxes as detect_edges
|
||||
edge_overlay_b64: str — Canny edge image as base64 JPEG
|
||||
lines_overlay_b64: str — frame with Hough lines drawn
|
||||
horizontal_count: int — number of horizontal lines found
|
||||
pair_count: int — number of line pairs found
|
||||
"""
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
edges = cv2.Canny(gray, canny_low, canny_high)
|
||||
|
||||
# Edge overlay — Canny output as-is (white edges on black)
|
||||
edge_overlay_b64 = _np_to_b64_jpeg(edges)
|
||||
|
||||
raw_lines = cv2.HoughLinesP(
|
||||
edges,
|
||||
rho=1,
|
||||
theta=np.pi / 180,
|
||||
threshold=hough_threshold,
|
||||
minLineLength=hough_min_length,
|
||||
maxLineGap=hough_max_gap,
|
||||
)
|
||||
|
||||
# Lines overlay — draw all Hough lines on a copy of the frame
|
||||
lines_vis = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
if raw_lines is not None:
|
||||
for line in raw_lines:
|
||||
x1, y1, x2, y2 = line[0]
|
||||
cv2.line(lines_vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
|
||||
|
||||
horizontals = []
|
||||
if raw_lines is not None:
|
||||
horizontals = _filter_horizontal(raw_lines, max_angle_deg=10)
|
||||
|
||||
# Draw horizontal lines in cyan, thicker
|
||||
for h_line in horizontals:
|
||||
x_min, x_max, y_mid, _ = h_line
|
||||
cv2.line(lines_vis, (int(x_min), int(y_mid)), (int(x_max), int(y_mid)), (255, 255, 0), 2)
|
||||
|
||||
pairs = []
|
||||
if len(horizontals) >= 2:
|
||||
pairs = _find_line_pairs(
|
||||
horizontals,
|
||||
min_distance=pair_min_distance,
|
||||
max_distance=pair_max_distance,
|
||||
)
|
||||
|
||||
# Draw paired lines in green
|
||||
for top_line, bottom_line in pairs:
|
||||
cv2.line(lines_vis, (int(top_line[0]), int(top_line[2])),
|
||||
(int(top_line[1]), int(top_line[2])), (0, 255, 0), 2)
|
||||
cv2.line(lines_vis, (int(bottom_line[0]), int(bottom_line[2])),
|
||||
(int(bottom_line[1]), int(bottom_line[2])), (0, 255, 0), 2)
|
||||
|
||||
lines_overlay_b64 = _np_to_b64_jpeg(lines_vis)
|
||||
|
||||
# Build region boxes (same logic as detect_edges)
|
||||
h, w = image.shape[:2]
|
||||
regions = []
|
||||
for top_line, bottom_line in pairs:
|
||||
box = _pair_to_bbox(top_line, bottom_line, frame_width=w, frame_height=h)
|
||||
if box is not None:
|
||||
regions.append(box)
|
||||
|
||||
return {
|
||||
"regions": regions,
|
||||
"edge_overlay_b64": edge_overlay_b64,
|
||||
"lines_overlay_b64": lines_overlay_b64,
|
||||
"horizontal_count": len(horizontals),
|
||||
"pair_count": len(pairs),
|
||||
}
|
||||
86
core/gpu/models/cv/segmentation.py
Normal file
86
core/gpu/models/cv/segmentation.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Field segmentation — HSV green mask → pitch boundary contour.
|
||||
|
||||
Pure OpenCV. Called by the inference server endpoint.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def segment_field(
|
||||
image: np.ndarray,
|
||||
hue_low: int = 30,
|
||||
hue_high: int = 85,
|
||||
sat_low: int = 30,
|
||||
sat_high: int = 255,
|
||||
val_low: int = 30,
|
||||
val_high: int = 255,
|
||||
morph_kernel: int = 15,
|
||||
min_area_ratio: float = 0.05,
|
||||
) -> dict:
|
||||
"""
|
||||
Detect the pitch area using HSV green thresholding.
|
||||
|
||||
Returns dict with:
|
||||
boundary: list of [x, y] points
|
||||
coverage: float (fraction of frame)
|
||||
"""
|
||||
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
|
||||
|
||||
lower = np.array([hue_low, sat_low, val_low])
|
||||
upper = np.array([hue_high, sat_high, val_high])
|
||||
mask = cv2.inRange(hsv, lower, upper)
|
||||
|
||||
k = morph_kernel
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
|
||||
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
h, w = image.shape[:2]
|
||||
min_area = min_area_ratio * h * w
|
||||
boundary = []
|
||||
coverage = 0.0
|
||||
|
||||
if contours:
|
||||
large = [c for c in contours if cv2.contourArea(c) >= min_area]
|
||||
if large:
|
||||
pitch_contour = max(large, key=cv2.contourArea)
|
||||
boundary = pitch_contour.reshape(-1, 2).tolist()
|
||||
coverage = cv2.contourArea(pitch_contour) / (h * w)
|
||||
|
||||
refined = np.zeros_like(mask)
|
||||
cv2.drawContours(refined, [pitch_contour], -1, 255, cv2.FILLED)
|
||||
mask = refined
|
||||
|
||||
return {
|
||||
"boundary": boundary,
|
||||
"coverage": coverage,
|
||||
"mask": mask,
|
||||
}
|
||||
|
||||
|
||||
def segment_field_debug(
|
||||
image: np.ndarray,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
"""Same as segment_field but includes a mask overlay for the editor."""
|
||||
result = segment_field(image, **kwargs)
|
||||
mask = result["mask"]
|
||||
|
||||
# RGBA overlay: solid green where mask, fully transparent elsewhere
|
||||
h, w = image.shape[:2]
|
||||
overlay = np.zeros((h, w, 4), dtype=np.uint8)
|
||||
overlay[mask > 0] = [0, 255, 0, 255]
|
||||
_, buf = cv2.imencode(".png", overlay)
|
||||
result["mask_overlay_b64"] = base64.b64encode(buf.tobytes()).decode()
|
||||
|
||||
# Don't send the raw mask over HTTP
|
||||
del result["mask"]
|
||||
return result
|
||||
136
core/gpu/models/models.py
Normal file
136
core/gpu/models/models.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Pydantic Models - GENERATED FILE
|
||||
|
||||
Do not edit directly. Regenerate using modelgen.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class DetectRequest(BaseModel):
|
||||
"""Request body for object detection."""
|
||||
image: str
|
||||
model: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
target_classes: Optional[List[str]] = None
|
||||
|
||||
class BBox(BaseModel):
|
||||
"""A detected bounding box."""
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
class DetectResponse(BaseModel):
|
||||
"""Response from object detection."""
|
||||
detections: List[BBox] = Field(default_factory=list)
|
||||
|
||||
class OCRRequest(BaseModel):
|
||||
"""Request body for OCR."""
|
||||
image: str
|
||||
languages: Optional[List[str]] = None
|
||||
|
||||
class OCRTextResult(BaseModel):
|
||||
"""A single OCR text extraction result."""
|
||||
text: str
|
||||
confidence: float
|
||||
bbox: List[int] = Field(default_factory=list)
|
||||
|
||||
class OCRResponse(BaseModel):
|
||||
"""Response from OCR."""
|
||||
results: List[OCRTextResult] = Field(default_factory=list)
|
||||
|
||||
class PreprocessRequest(BaseModel):
|
||||
"""Request body for image preprocessing."""
|
||||
image: str
|
||||
binarize: bool = False
|
||||
deskew: bool = False
|
||||
contrast: bool = True
|
||||
|
||||
class PreprocessResponse(BaseModel):
|
||||
"""Response from preprocessing."""
|
||||
image: str
|
||||
|
||||
class VLMRequest(BaseModel):
|
||||
"""Request body for visual language model query."""
|
||||
image: str
|
||||
prompt: str
|
||||
model: Optional[str] = None
|
||||
|
||||
class VLMResponse(BaseModel):
|
||||
"""Response from VLM."""
|
||||
brand: str
|
||||
confidence: float
|
||||
reasoning: str
|
||||
|
||||
class AnalyzeRegionsRequest(BaseModel):
|
||||
"""Request body for CV region analysis."""
|
||||
image: str
|
||||
edge_canny_low: int = 50
|
||||
edge_canny_high: int = 150
|
||||
edge_hough_threshold: int = 80
|
||||
edge_hough_min_length: int = 100
|
||||
edge_hough_max_gap: int = 10
|
||||
edge_pair_max_distance: int = 200
|
||||
edge_pair_min_distance: int = 15
|
||||
|
||||
class RegionBox(BaseModel):
|
||||
"""A candidate region from CV analysis."""
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
class AnalyzeRegionsResponse(BaseModel):
|
||||
"""Response from CV region analysis."""
|
||||
regions: List[RegionBox] = Field(default_factory=list)
|
||||
|
||||
class AnalyzeRegionsDebugResponse(BaseModel):
|
||||
"""Response from CV region analysis with debug overlays."""
|
||||
regions: List[RegionBox] = Field(default_factory=list)
|
||||
edge_overlay_b64: str = ""
|
||||
lines_overlay_b64: str = ""
|
||||
horizontal_count: int = 0
|
||||
pair_count: int = 0
|
||||
|
||||
class SegmentFieldRequest(BaseModel):
|
||||
"""Request body for field segmentation."""
|
||||
image: str
|
||||
hue_low: int = 30
|
||||
hue_high: int = 85
|
||||
sat_low: int = 30
|
||||
sat_high: int = 255
|
||||
val_low: int = 30
|
||||
val_high: int = 255
|
||||
morph_kernel: int = 15
|
||||
min_area_ratio: float = 0.05
|
||||
|
||||
class SegmentFieldResponse(BaseModel):
|
||||
"""Response from field segmentation."""
|
||||
boundary: List[List[int]] = Field(default_factory=list)
|
||||
coverage: float = 0.0
|
||||
mask_b64: str = ""
|
||||
|
||||
class SegmentFieldDebugResponse(BaseModel):
|
||||
"""Response from field segmentation with debug overlay."""
|
||||
boundary: List[List[int]] = Field(default_factory=list)
|
||||
coverage: float = 0.0
|
||||
mask_overlay_b64: str = ""
|
||||
|
||||
class ConfigUpdate(BaseModel):
|
||||
"""Request body for updating server configuration."""
|
||||
device: Optional[str] = None
|
||||
yolo_model: Optional[str] = None
|
||||
yolo_confidence: Optional[float] = None
|
||||
vram_budget_mb: Optional[int] = None
|
||||
strategy: Optional[str] = None
|
||||
ocr_languages: Optional[List[str]] = None
|
||||
ocr_min_confidence: Optional[float] = None
|
||||
105
core/gpu/models/ocr.py
Normal file
105
core/gpu/models/ocr.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""PaddleOCR 3.x text extraction wrapper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from models import registry
|
||||
from config import get_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load(languages: list[str]):
|
||||
from paddleocr import PaddleOCR
|
||||
key = f"ocr_{'_'.join(languages)}"
|
||||
model = PaddleOCR(lang=languages[0])
|
||||
registry.put(key, model)
|
||||
return model
|
||||
|
||||
|
||||
def _get(languages: list[str] | None = None):
|
||||
langs = languages or get_config()["ocr_languages"]
|
||||
key = f"ocr_{'_'.join(langs)}"
|
||||
model = registry.get(key)
|
||||
if model is None:
|
||||
model = _load(langs)
|
||||
return model
|
||||
|
||||
|
||||
def _parse_raw(raw) -> list[tuple[list, str, float]]:
|
||||
"""
|
||||
Parse PaddleOCR output into (points, text, confidence) tuples.
|
||||
|
||||
PaddleOCR 3.x changed the result format. Two known layouts:
|
||||
|
||||
Layout A — dict-based (new pipeline API):
|
||||
raw = [{'rec_texts': [...], 'rec_scores': [...], 'dt_polys': [...]}]
|
||||
|
||||
Layout B — nested list (2.x compat / some 3.x builds):
|
||||
raw = [[ [points, [text, score]], ... ]]
|
||||
raw = [[ [points, [text, score], [cls, cls_score]], ... ]] # with angle cls
|
||||
"""
|
||||
results = []
|
||||
|
||||
for page in raw:
|
||||
if not page:
|
||||
continue
|
||||
|
||||
# Layout A: dict with parallel lists
|
||||
if isinstance(page, dict):
|
||||
texts = page.get("rec_texts", [])
|
||||
scores = page.get("rec_scores", [])
|
||||
polys = page.get("dt_polys", [])
|
||||
for points, text, confidence in zip(polys, texts, scores):
|
||||
results.append((points, text, float(confidence)))
|
||||
continue
|
||||
|
||||
# Layout B: list of per-line entries
|
||||
for line in page:
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# line[0] is always the polygon points
|
||||
points = line[0]
|
||||
|
||||
# line[1] is [text, score] — ignore any extra elements (angle cls etc.)
|
||||
rec = line[1]
|
||||
if isinstance(rec, (list, tuple)) and len(rec) >= 2:
|
||||
text, confidence = rec[0], rec[1]
|
||||
else:
|
||||
logger.warning("Unexpected OCR line format: %s", line)
|
||||
continue
|
||||
|
||||
results.append((points, str(text), float(confidence)))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def ocr(image, languages: list[str] | None = None, min_confidence: float | None = None) -> list[dict]:
|
||||
"""Run OCR on an image, return list of text result dicts."""
|
||||
cfg = get_config()
|
||||
min_conf = min_confidence if min_confidence is not None else cfg["ocr_min_confidence"]
|
||||
model = _get(languages)
|
||||
|
||||
raw = model.ocr(image)
|
||||
logger.debug("OCR raw: %s", raw)
|
||||
|
||||
parsed = _parse_raw(raw)
|
||||
|
||||
results = []
|
||||
for points, text, confidence in parsed:
|
||||
if confidence < min_conf:
|
||||
continue
|
||||
|
||||
xs = [p[0] for p in points]
|
||||
ys = [p[1] for p in points]
|
||||
|
||||
results.append({
|
||||
"text": text,
|
||||
"confidence": confidence,
|
||||
"bbox": [int(min(xs)), int(min(ys)),
|
||||
int(max(xs) - min(xs)), int(max(ys) - min(ys))],
|
||||
})
|
||||
|
||||
return results
|
||||
117
core/gpu/models/preprocess.py
Normal file
117
core/gpu/models/preprocess.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Image preprocessing pipeline for crops before OCR.
|
||||
|
||||
Each step is independently toggleable via config.
|
||||
Operates on numpy arrays (BGR or RGB), returns processed array.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray:
|
||||
"""Convert to grayscale and apply Otsu binarization."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
|
||||
_, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# Convert back to 3-channel for downstream compatibility
|
||||
result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
|
||||
return result
|
||||
|
||||
|
||||
def deskew(image: np.ndarray) -> np.ndarray:
|
||||
"""Correct slight rotation using minimum area rectangle."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
|
||||
coords = np.column_stack(np.where(gray < 128))
|
||||
if len(coords) < 10:
|
||||
return image
|
||||
|
||||
rect = cv2.minAreaRect(coords)
|
||||
angle = rect[-1]
|
||||
|
||||
# Normalize angle
|
||||
if angle < -45:
|
||||
angle = -(90 + angle)
|
||||
else:
|
||||
angle = -angle
|
||||
|
||||
if abs(angle) < 0.5:
|
||||
return image
|
||||
|
||||
h, w = image.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
result = cv2.warpAffine(
|
||||
image, rotation_matrix, (w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REPLICATE,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def enhance_contrast(image: np.ndarray) -> np.ndarray:
|
||||
"""Apply CLAHE (adaptive histogram equalization) for contrast normalization."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
|
||||
l_channel = lab[:, :, 0]
|
||||
else:
|
||||
l_channel = image
|
||||
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(l_channel)
|
||||
|
||||
if len(image.shape) == 3:
|
||||
lab[:, :, 0] = enhanced
|
||||
result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
|
||||
else:
|
||||
result = enhanced
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def preprocess(
|
||||
image: np.ndarray,
|
||||
do_binarize: bool = False,
|
||||
do_deskew: bool = False,
|
||||
do_contrast: bool = True,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Run the preprocessing pipeline on a crop image.
|
||||
|
||||
Each step is independently toggleable. Order: contrast → deskew → binarize.
|
||||
Contrast first (works best on color), binarize last (destroys color info).
|
||||
"""
|
||||
result = image
|
||||
|
||||
if do_contrast:
|
||||
result = enhance_contrast(result)
|
||||
logger.debug("Preprocessing: contrast enhanced")
|
||||
|
||||
if do_deskew:
|
||||
result = deskew(result)
|
||||
logger.debug("Preprocessing: deskewed")
|
||||
|
||||
if do_binarize:
|
||||
result = binarize(result)
|
||||
logger.debug("Preprocessing: binarized")
|
||||
|
||||
return result
|
||||
37
core/gpu/models/registry.py
Normal file
37
core/gpu/models/registry.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Model registry — manages loaded models and VRAM lifecycle.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_models: dict[str, object] = {}
|
||||
|
||||
|
||||
def get(name: str) -> object | None:
|
||||
return _models.get(name)
|
||||
|
||||
|
||||
def put(name: str, model: object) -> None:
|
||||
_models[name] = model
|
||||
logger.info("Loaded %s", name)
|
||||
|
||||
|
||||
def unload(name: str) -> bool:
|
||||
if name in _models:
|
||||
del _models[name]
|
||||
logger.info("Unloaded %s", name)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def loaded() -> list[str]:
|
||||
return list(_models.keys())
|
||||
|
||||
|
||||
def clear() -> None:
|
||||
_models.clear()
|
||||
logger.info("All models unloaded")
|
||||
100
core/gpu/models/vlm.py
Normal file
100
core/gpu/models/vlm.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""moondream2 visual language model wrapper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from models import registry
|
||||
from config import get_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_MODEL_KEY = "vlm_moondream2"
|
||||
|
||||
|
||||
def _load():
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
device = get_config().get("device", "auto")
|
||||
if device == "auto":
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
logger.info("Loading moondream2 (device=%s)...", device)
|
||||
|
||||
model_id = "vikhyatk/moondream2"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
||||
dtype = torch.float16 if "cuda" in device else torch.float32
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
trust_remote_code=True,
|
||||
dtype=dtype,
|
||||
device_map=device,
|
||||
)
|
||||
|
||||
wrapper = {"model": model, "tokenizer": tokenizer}
|
||||
registry.put(_MODEL_KEY, wrapper)
|
||||
logger.info("moondream2 loaded")
|
||||
return wrapper
|
||||
|
||||
|
||||
def _get():
|
||||
wrapper = registry.get(_MODEL_KEY)
|
||||
if wrapper is None:
|
||||
wrapper = _load()
|
||||
return wrapper
|
||||
|
||||
|
||||
def query(image, prompt: str) -> dict:
|
||||
"""
|
||||
Query moondream2 with an image crop and prompt.
|
||||
|
||||
Returns {"brand": str, "confidence": float, "reasoning": str}
|
||||
"""
|
||||
from PIL import Image as PILImage
|
||||
|
||||
wrapper = _get()
|
||||
model = wrapper["model"]
|
||||
tokenizer = wrapper["tokenizer"]
|
||||
|
||||
# Convert numpy array to PIL if needed
|
||||
if not isinstance(image, PILImage.Image):
|
||||
image = PILImage.fromarray(image)
|
||||
|
||||
enc_image = model.encode_image(image)
|
||||
answer = model.answer_question(enc_image, prompt, tokenizer)
|
||||
|
||||
# Parse response — moondream2 returns free text, extract brand + confidence
|
||||
result = _parse_vlm_response(answer)
|
||||
return result
|
||||
|
||||
|
||||
def _parse_vlm_response(answer: str) -> dict:
|
||||
"""
|
||||
Parse moondream2 free-text response into structured output.
|
||||
|
||||
Expected format from prompt: "brand, confidence (0-1), reasoning"
|
||||
Falls back gracefully if format doesn't match.
|
||||
"""
|
||||
answer = answer.strip()
|
||||
parts = [p.strip() for p in answer.split(",", 2)]
|
||||
|
||||
brand = parts[0] if parts else ""
|
||||
confidence = 0.5
|
||||
reasoning = answer
|
||||
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
confidence = float(parts[1])
|
||||
confidence = max(0.0, min(1.0, confidence))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if len(parts) >= 3:
|
||||
reasoning = parts[2]
|
||||
|
||||
return {
|
||||
"brand": brand,
|
||||
"confidence": confidence,
|
||||
"reasoning": reasoning,
|
||||
}
|
||||
54
core/gpu/models/yolo.py
Normal file
54
core/gpu/models/yolo.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""YOLO object detection model wrapper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from models import registry
|
||||
from config import get_config, get_device
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load(model_name: str):
|
||||
from ultralytics import YOLO
|
||||
device = get_device()
|
||||
model = YOLO(model_name)
|
||||
model.to(device)
|
||||
registry.put(model_name, model)
|
||||
return model
|
||||
|
||||
|
||||
def _get(model_name: str | None = None):
|
||||
name = model_name or get_config()["yolo_model"]
|
||||
model = registry.get(name)
|
||||
if model is None:
|
||||
model = _load(name)
|
||||
return model
|
||||
|
||||
|
||||
def detect(image, model_name: str | None = None, confidence: float | None = None, target_classes: list[str] | None = None) -> list[dict]:
|
||||
"""Run YOLO detection, return list of bbox dicts."""
|
||||
cfg = get_config()
|
||||
conf = confidence if confidence is not None else cfg["yolo_confidence"]
|
||||
model = _get(model_name)
|
||||
|
||||
results = model(image, conf=conf, verbose=False)
|
||||
|
||||
detections = []
|
||||
for r in results:
|
||||
for box in r.boxes:
|
||||
x1, y1, x2, y2 = box.xyxy[0].tolist()
|
||||
label = r.names[int(box.cls[0])]
|
||||
|
||||
if target_classes and label not in target_classes:
|
||||
continue
|
||||
|
||||
detections.append({
|
||||
"x": int(x1), "y": int(y1),
|
||||
"w": int(x2 - x1), "h": int(y2 - y1),
|
||||
"confidence": float(box.conf[0]),
|
||||
"label": label,
|
||||
})
|
||||
|
||||
return detections
|
||||
31
core/gpu/requirements.txt
Normal file
31
core/gpu/requirements.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
fastapi>=0.109.0
|
||||
uvicorn[standard]>=0.27.0
|
||||
rapidfuzz>=3.0.0
|
||||
Pillow>=10.0.0
|
||||
redis>=5.0.0
|
||||
|
||||
# --- GPU-specific installs (mcrn: RTX 3080, CUDA toolkit 12.8) ---
|
||||
#
|
||||
# torch: must be installed from the PyTorch index, NOT from PyPI.
|
||||
# cu126 is the closest build to CUDA 12.8 (no cu128 wheel yet; cu126 is forward-compatible).
|
||||
# Install with:
|
||||
# uv pip install --reinstall torch torchvision --index-url https://download.pytorch.org/whl/cu126
|
||||
#
|
||||
# ultralytics pulls torch as a dependency — reinstall torch after ultralytics to ensure
|
||||
# the correct CUDA build. Mixing the PyPI torch with CUDA 12.8 causes NCCL symbol errors.
|
||||
ultralytics>=8.0.0
|
||||
|
||||
# paddlepaddle-gpu: NOT available on PyPI. Install from PaddlePaddle's package index.
|
||||
# cu126 build works on CUDA 12.8.
|
||||
# Install with:
|
||||
# uv pip install paddlepaddle-gpu==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
||||
paddleocr>=3.0.0
|
||||
|
||||
# VLM (moondream2) — uses torch (already installed above)
|
||||
# Pinned <5: transformers 5.x broke moondream2's custom model code
|
||||
# (all_tied_weights_keys API change). Also needs accelerate for device_map.
|
||||
transformers>=4.40.0,<5
|
||||
accelerate>=0.27.0
|
||||
|
||||
# Preprocessing (phase 12)
|
||||
opencv-python-headless>=4.8.0
|
||||
54
core/gpu/run.sh
Executable file
54
core/gpu/run.sh
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
# Run the inference server
|
||||
#
|
||||
# Usage:
|
||||
# ./run.sh # Local (pip install -r requirements.txt first)
|
||||
# ./run.sh docker # Docker (CPU)
|
||||
# ./run.sh docker-gpu # Docker with GPU
|
||||
# ./run.sh stop # Stop Docker container
|
||||
|
||||
set -e
|
||||
cd "$(dirname "${BASH_SOURCE[0]}")"
|
||||
|
||||
# Load env (create from template if missing)
|
||||
if [ ! -f .env ]; then
|
||||
if [ -f .env.template ]; then
|
||||
cp .env.template .env
|
||||
echo "Created .env from template — edit as needed"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f .env ]; then
|
||||
set -a
|
||||
source .env
|
||||
set +a
|
||||
fi
|
||||
|
||||
case "${1:-local}" in
|
||||
local)
|
||||
python server.py
|
||||
;;
|
||||
docker)
|
||||
docker build -t mpr-inference .
|
||||
ENV_FLAG=""; [ -f .env ] && ENV_FLAG="--env-file .env"
|
||||
docker run --rm -p "${PORT:-8000}:8000" \
|
||||
$ENV_FLAG \
|
||||
--name mpr-inference \
|
||||
mpr-inference
|
||||
;;
|
||||
docker-gpu)
|
||||
docker build -t mpr-inference .
|
||||
ENV_FLAG=""; [ -f .env ] && ENV_FLAG="--env-file .env"
|
||||
docker run --rm --gpus all -p "${PORT:-8000}:8000" \
|
||||
$ENV_FLAG \
|
||||
--name mpr-inference \
|
||||
mpr-inference
|
||||
;;
|
||||
stop)
|
||||
docker stop mpr-inference 2>/dev/null || true
|
||||
;;
|
||||
*)
|
||||
echo "Usage: ./run.sh [local|docker|docker-gpu|stop]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
399
core/gpu/server.py
Normal file
399
core/gpu/server.py
Normal file
@@ -0,0 +1,399 @@
|
||||
"""
|
||||
Inference server — thin HTTP routes over model wrappers.
|
||||
|
||||
Config lives in config.py, model logic in models/.
|
||||
This file is just the FastAPI glue.
|
||||
|
||||
Usage:
|
||||
cd gpu && python server.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel
|
||||
|
||||
from emit import log as emit_log
|
||||
|
||||
from config import get_config, get_device, update_config
|
||||
from models import registry
|
||||
from models.yolo import detect as yolo_detect
|
||||
from models.ocr import ocr as ocr_run
|
||||
from models.vlm import query as vlm_query
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _decode_image(b64: str) -> np.ndarray:
|
||||
data = base64.b64decode(b64)
|
||||
img = Image.open(io.BytesIO(data)).convert("RGB")
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def _job_ctx(request: Request) -> tuple[str, str]:
|
||||
"""Extract job_id and log_level from request headers."""
|
||||
job_id = request.headers.get("x-job-id", "")
|
||||
log_level = request.headers.get("x-log-level", "INFO")
|
||||
return job_id, log_level
|
||||
|
||||
|
||||
def _gpu_log(job_id: str, log_level: str, stage: str, level: str, msg: str):
|
||||
"""Emit a log event if job context is present."""
|
||||
if job_id:
|
||||
emit_log(job_id, stage, level, msg, log_level=log_level)
|
||||
|
||||
|
||||
# --- Request/Response models (generated from core/schema/models/inference.py) ---
|
||||
|
||||
from models.models import (
|
||||
AnalyzeRegionsDebugResponse,
|
||||
AnalyzeRegionsRequest,
|
||||
AnalyzeRegionsResponse,
|
||||
BBox,
|
||||
ConfigUpdate,
|
||||
DetectRequest,
|
||||
DetectResponse,
|
||||
OCRRequest,
|
||||
OCRResponse,
|
||||
OCRTextResult,
|
||||
PreprocessRequest,
|
||||
PreprocessResponse,
|
||||
RegionBox,
|
||||
SegmentFieldRequest,
|
||||
SegmentFieldResponse,
|
||||
SegmentFieldDebugResponse,
|
||||
VLMRequest,
|
||||
VLMResponse,
|
||||
)
|
||||
|
||||
|
||||
# --- App ---
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
logger.info("Inference server starting (device=%s)", get_device())
|
||||
yield
|
||||
logger.info("Shutting down")
|
||||
registry.clear()
|
||||
|
||||
|
||||
app = FastAPI(title="MPR Inference Server", lifespan=lifespan)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
cfg = get_config()
|
||||
return {
|
||||
"status": "ok",
|
||||
"device": get_device(),
|
||||
"loaded_models": registry.loaded(),
|
||||
"vram_budget_mb": cfg["vram_budget_mb"],
|
||||
"strategy": cfg["strategy"],
|
||||
}
|
||||
|
||||
|
||||
@app.get("/config")
|
||||
def read_config():
|
||||
return {**get_config(), "device_resolved": get_device()}
|
||||
|
||||
|
||||
@app.put("/config")
|
||||
def write_config(update: ConfigUpdate):
|
||||
changes = update.model_dump(exclude_none=True)
|
||||
if not changes:
|
||||
return get_config()
|
||||
|
||||
# Unload model if it changed
|
||||
old_model = get_config().get("yolo_model")
|
||||
if "yolo_model" in changes and changes["yolo_model"] != old_model:
|
||||
registry.unload(old_model)
|
||||
|
||||
update_config(changes)
|
||||
logger.info("Config updated: %s", changes)
|
||||
return {**get_config(), "device_resolved": get_device()}
|
||||
|
||||
|
||||
@app.post("/models/unload")
|
||||
def unload_model(body: dict):
|
||||
name = body.get("model", "")
|
||||
unloaded = registry.unload(name)
|
||||
return {"status": "unloaded" if unloaded else "not_loaded", "model": name}
|
||||
|
||||
|
||||
@app.post("/detect", response_model=DetectResponse)
|
||||
def detect(req: DetectRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
image = _decode_image(req.image)
|
||||
decode_ms = (time.monotonic() - t0) * 1000
|
||||
h, w = image.shape[:2]
|
||||
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
|
||||
f"Decoded {w}x{h} image in {decode_ms:.0f}ms")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
results = yolo_detect(
|
||||
image,
|
||||
model_name=req.model,
|
||||
confidence=req.confidence,
|
||||
target_classes=req.target_classes,
|
||||
)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
_gpu_log(job_id, log_level, "GPU:YOLO", "DEBUG",
|
||||
f"Inference: {len(results)} detections in {infer_ms:.0f}ms "
|
||||
f"(model={req.model}, conf={req.confidence})")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Detection failed: {e}")
|
||||
|
||||
return DetectResponse(detections=[BBox(**r) for r in results])
|
||||
|
||||
|
||||
@app.post("/ocr", response_model=OCRResponse)
|
||||
def ocr(req: OCRRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
results = ocr_run(image, languages=req.languages)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
texts = [r["text"][:20] for r in results]
|
||||
_gpu_log(job_id, log_level, "GPU:OCR", "DEBUG",
|
||||
f"OCR {w}x{h}: {infer_ms:.0f}ms → {len(results)} results {texts}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
||||
|
||||
return OCRResponse(results=[OCRTextResult(**r) for r in results])
|
||||
|
||||
|
||||
@app.post("/preprocess", response_model=PreprocessResponse)
|
||||
def preprocess_image(req: PreprocessRequest):
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
from models.preprocess import preprocess
|
||||
processed = preprocess(
|
||||
image,
|
||||
do_binarize=req.binarize,
|
||||
do_deskew=req.deskew,
|
||||
do_contrast=req.contrast,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Preprocessing failed: {e}")
|
||||
|
||||
from PIL import Image as PILImage
|
||||
import io
|
||||
img = PILImage.fromarray(processed)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="JPEG", quality=90)
|
||||
result_b64 = base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
return PreprocessResponse(image=result_b64)
|
||||
|
||||
|
||||
@app.post("/vlm", response_model=VLMResponse)
|
||||
def vlm(req: VLMRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
result = vlm_query(image, req.prompt)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
_gpu_log(job_id, log_level, "GPU:VLM", "DEBUG",
|
||||
f"VLM {w}x{h}: {infer_ms:.0f}ms → "
|
||||
f"brand='{result.get('brand', '')}' conf={result.get('confidence', 0):.2f}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"VLM failed: {e}")
|
||||
|
||||
return VLMResponse(**result)
|
||||
|
||||
|
||||
@app.post("/detect_edges", response_model=AnalyzeRegionsResponse)
|
||||
def detect_edges_endpoint(req: AnalyzeRegionsRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
from models.cv.edges import detect_edges
|
||||
|
||||
edge_regions = detect_edges(
|
||||
image,
|
||||
canny_low=req.edge_canny_low,
|
||||
canny_high=req.edge_canny_high,
|
||||
hough_threshold=req.edge_hough_threshold,
|
||||
hough_min_length=req.edge_hough_min_length,
|
||||
hough_max_gap=req.edge_hough_max_gap,
|
||||
pair_max_distance=req.edge_pair_max_distance,
|
||||
pair_min_distance=req.edge_pair_min_distance,
|
||||
)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
_gpu_log(job_id, log_level, "GPU:CV", "DEBUG",
|
||||
f"Edge analysis {w}x{h}: {infer_ms:.0f}ms → {len(edge_regions)} regions")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Region analysis failed: {e}")
|
||||
|
||||
boxes = [RegionBox(**r) for r in edge_regions]
|
||||
return AnalyzeRegionsResponse(regions=boxes)
|
||||
|
||||
|
||||
@app.post("/detect_edges/debug", response_model=AnalyzeRegionsDebugResponse)
|
||||
def detect_edges_debug_endpoint(req: AnalyzeRegionsRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
from models.cv.edges import detect_edges_debug
|
||||
|
||||
result = detect_edges_debug(
|
||||
image,
|
||||
canny_low=req.edge_canny_low,
|
||||
canny_high=req.edge_canny_high,
|
||||
hough_threshold=req.edge_hough_threshold,
|
||||
hough_min_length=req.edge_hough_min_length,
|
||||
hough_max_gap=req.edge_hough_max_gap,
|
||||
pair_max_distance=req.edge_pair_max_distance,
|
||||
pair_min_distance=req.edge_pair_min_distance,
|
||||
)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
_gpu_log(job_id, log_level, "GPU:CV", "DEBUG",
|
||||
f"Edge debug {w}x{h}: {infer_ms:.0f}ms → {len(result['regions'])} regions, "
|
||||
f"{result['horizontal_count']} horizontals, {result['pair_count']} pairs")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Region debug analysis failed: {e}")
|
||||
|
||||
boxes = [RegionBox(**r) for r in result["regions"]]
|
||||
response = AnalyzeRegionsDebugResponse(
|
||||
regions=boxes,
|
||||
edge_overlay_b64=result["edge_overlay_b64"],
|
||||
lines_overlay_b64=result["lines_overlay_b64"],
|
||||
horizontal_count=result["horizontal_count"],
|
||||
pair_count=result["pair_count"],
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
@app.post("/segment_field", response_model=SegmentFieldResponse)
|
||||
def segment_field_endpoint(req: SegmentFieldRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
h, w = image.shape[:2]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
from models.cv.segmentation import segment_field
|
||||
|
||||
result = segment_field(
|
||||
image,
|
||||
hue_low=req.hue_low,
|
||||
hue_high=req.hue_high,
|
||||
sat_low=req.sat_low,
|
||||
sat_high=req.sat_high,
|
||||
val_low=req.val_low,
|
||||
val_high=req.val_high,
|
||||
morph_kernel=req.morph_kernel,
|
||||
min_area_ratio=req.min_area_ratio,
|
||||
)
|
||||
infer_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
# Encode mask as base64 PNG for downstream use
|
||||
import cv2
|
||||
_, buf = cv2.imencode(".png", result["mask"])
|
||||
mask_b64 = base64.b64encode(buf.tobytes()).decode()
|
||||
|
||||
_gpu_log(job_id, log_level, "GPU:CV", "DEBUG",
|
||||
f"Field segmentation {w}x{h}: {infer_ms:.0f}ms, coverage={result['coverage']:.1%}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Field segmentation failed: {e}")
|
||||
|
||||
return SegmentFieldResponse(
|
||||
boundary=result["boundary"],
|
||||
coverage=result["coverage"],
|
||||
mask_b64=mask_b64,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/segment_field/debug", response_model=SegmentFieldDebugResponse)
|
||||
def segment_field_debug_endpoint(req: SegmentFieldRequest, request: Request):
|
||||
job_id, log_level = _job_ctx(request)
|
||||
|
||||
try:
|
||||
image = _decode_image(req.image)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Bad image: {e}")
|
||||
|
||||
try:
|
||||
from models.cv.segmentation import segment_field_debug
|
||||
|
||||
result = segment_field_debug(
|
||||
image,
|
||||
hue_low=req.hue_low,
|
||||
hue_high=req.hue_high,
|
||||
sat_low=req.sat_low,
|
||||
sat_high=req.sat_high,
|
||||
val_low=req.val_low,
|
||||
val_high=req.val_high,
|
||||
morph_kernel=req.morph_kernel,
|
||||
min_area_ratio=req.min_area_ratio,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Field segmentation debug failed: {e}")
|
||||
|
||||
return SegmentFieldDebugResponse(
|
||||
boundary=result["boundary"],
|
||||
coverage=result["coverage"],
|
||||
mask_overlay_b64=result["mask_overlay_b64"],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)-7s %(name)s — %(message)s")
|
||||
host = os.environ.get("HOST", "0.0.0.0")
|
||||
port = int(os.environ.get("PORT", "8000"))
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
Reference in New Issue
Block a user