198 lines
3.8 KiB
Python
198 lines
3.8 KiB
Python
"""
|
|
Inference Server API Schema Definitions
|
|
|
|
Source of truth for GPU inference server request/response types.
|
|
Generates: Pydantic (gpu/models/inference_contract.py)
|
|
|
|
These are the wire-format types for the HTTP API between the
|
|
pipeline (detect/) and the inference server (gpu/).
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Optional
|
|
|
|
|
|
# --- Object Detection (YOLO) ---
|
|
|
|
|
|
@dataclass
|
|
class DetectRequest:
|
|
"""Request body for object detection."""
|
|
|
|
image: str # base64 JPEG
|
|
model: Optional[str] = None
|
|
confidence: Optional[float] = None
|
|
target_classes: Optional[List[str]] = None
|
|
|
|
|
|
@dataclass
|
|
class BBox:
|
|
"""A detected bounding box."""
|
|
|
|
x: int
|
|
y: int
|
|
w: int
|
|
h: int
|
|
confidence: float
|
|
label: str
|
|
|
|
|
|
@dataclass
|
|
class DetectResponse:
|
|
"""Response from object detection."""
|
|
|
|
detections: List[BBox] = field(default_factory=list)
|
|
|
|
|
|
# --- OCR ---
|
|
|
|
|
|
@dataclass
|
|
class OCRRequest:
|
|
"""Request body for OCR."""
|
|
|
|
image: str # base64 JPEG
|
|
languages: Optional[List[str]] = None
|
|
|
|
|
|
@dataclass
|
|
class OCRTextResult:
|
|
"""A single OCR text extraction result."""
|
|
|
|
text: str
|
|
confidence: float
|
|
bbox: List[int] = field(default_factory=list) # [x, y, w, h]
|
|
|
|
|
|
@dataclass
|
|
class OCRResponse:
|
|
"""Response from OCR."""
|
|
|
|
results: List[OCRTextResult] = field(default_factory=list)
|
|
|
|
|
|
# --- Preprocessing ---
|
|
|
|
|
|
@dataclass
|
|
class PreprocessRequest:
|
|
"""Request body for image preprocessing."""
|
|
|
|
image: str # base64 JPEG
|
|
binarize: bool = False
|
|
deskew: bool = False
|
|
contrast: bool = True
|
|
|
|
|
|
@dataclass
|
|
class PreprocessResponse:
|
|
"""Response from preprocessing."""
|
|
|
|
image: str # base64 JPEG of processed image
|
|
|
|
|
|
# --- VLM ---
|
|
|
|
|
|
@dataclass
|
|
class VLMRequest:
|
|
"""Request body for visual language model query."""
|
|
|
|
image: str # base64 JPEG
|
|
prompt: str
|
|
model: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class VLMResponse:
|
|
"""Response from VLM."""
|
|
|
|
brand: str
|
|
confidence: float
|
|
reasoning: str
|
|
|
|
|
|
# --- CV Region Analysis ---
|
|
|
|
|
|
@dataclass
|
|
class AnalyzeRegionsRequest:
|
|
"""Request body for CV region analysis."""
|
|
|
|
image: str # base64 JPEG
|
|
# Edge detection (Canny + HoughLinesP)
|
|
edge_canny_low: int = 50
|
|
edge_canny_high: int = 150
|
|
edge_hough_threshold: int = 80
|
|
edge_hough_min_length: int = 100
|
|
edge_hough_max_gap: int = 10
|
|
edge_pair_max_distance: int = 200
|
|
edge_pair_min_distance: int = 15
|
|
|
|
|
|
@dataclass
|
|
class RegionBox:
|
|
"""A candidate region from CV analysis."""
|
|
|
|
x: int
|
|
y: int
|
|
w: int
|
|
h: int
|
|
confidence: float
|
|
label: str
|
|
|
|
|
|
@dataclass
|
|
class AnalyzeRegionsResponse:
|
|
"""Response from CV region analysis."""
|
|
|
|
regions: List[RegionBox] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class AnalyzeRegionsDebugResponse:
|
|
"""Response from CV region analysis with debug overlays."""
|
|
|
|
regions: List[RegionBox] = field(default_factory=list)
|
|
edge_overlay_b64: str = "" # Canny edge image as base64 JPEG
|
|
lines_overlay_b64: str = "" # frame with Hough lines drawn
|
|
horizontal_count: int = 0
|
|
pair_count: int = 0
|
|
|
|
|
|
# --- Server Config ---
|
|
|
|
|
|
@dataclass
|
|
class ConfigUpdate:
|
|
"""Request body for updating server configuration."""
|
|
|
|
device: Optional[str] = None
|
|
yolo_model: Optional[str] = None
|
|
yolo_confidence: Optional[float] = None
|
|
vram_budget_mb: Optional[int] = None
|
|
strategy: Optional[str] = None
|
|
ocr_languages: Optional[List[str]] = None
|
|
ocr_min_confidence: Optional[float] = None
|
|
|
|
|
|
# --- Export list for modelgen ---
|
|
|
|
INFERENCE_VIEWS = [
|
|
DetectRequest,
|
|
BBox,
|
|
DetectResponse,
|
|
OCRRequest,
|
|
OCRTextResult,
|
|
OCRResponse,
|
|
PreprocessRequest,
|
|
PreprocessResponse,
|
|
VLMRequest,
|
|
VLMResponse,
|
|
AnalyzeRegionsRequest,
|
|
RegionBox,
|
|
AnalyzeRegionsResponse,
|
|
AnalyzeRegionsDebugResponse,
|
|
ConfigUpdate,
|
|
]
|