Files
mediaproc/core/schema/models/inference.py
2026-03-30 07:22:14 -03:00

234 lines
4.7 KiB
Python

"""
Inference Server API Schema Definitions
Source of truth for GPU inference server request/response types.
Generates: Pydantic (gpu/models/inference_contract.py)
These are the wire-format types for the HTTP API between the
pipeline (detect/) and the inference server (gpu/).
"""
from dataclasses import dataclass, field
from typing import List, Optional
# --- Object Detection (YOLO) ---
@dataclass
class DetectRequest:
"""Request body for object detection."""
image: str # base64 JPEG
model: Optional[str] = None
confidence: Optional[float] = None
target_classes: Optional[List[str]] = None
@dataclass
class BBox:
"""A detected bounding box."""
x: int
y: int
w: int
h: int
confidence: float
label: str
@dataclass
class DetectResponse:
"""Response from object detection."""
detections: List[BBox] = field(default_factory=list)
# --- OCR ---
@dataclass
class OCRRequest:
"""Request body for OCR."""
image: str # base64 JPEG
languages: Optional[List[str]] = None
@dataclass
class OCRTextResult:
"""A single OCR text extraction result."""
text: str
confidence: float
bbox: List[int] = field(default_factory=list) # [x, y, w, h]
@dataclass
class OCRResponse:
"""Response from OCR."""
results: List[OCRTextResult] = field(default_factory=list)
# --- Preprocessing ---
@dataclass
class PreprocessRequest:
"""Request body for image preprocessing."""
image: str # base64 JPEG
binarize: bool = False
deskew: bool = False
contrast: bool = True
@dataclass
class PreprocessResponse:
"""Response from preprocessing."""
image: str # base64 JPEG of processed image
# --- VLM ---
@dataclass
class VLMRequest:
"""Request body for visual language model query."""
image: str # base64 JPEG
prompt: str
model: Optional[str] = None
@dataclass
class VLMResponse:
"""Response from VLM."""
brand: str
confidence: float
reasoning: str
# --- CV Region Analysis ---
@dataclass
class AnalyzeRegionsRequest:
"""Request body for CV region analysis."""
image: str # base64 JPEG
# Edge detection (Canny + HoughLinesP)
edge_canny_low: int = 50
edge_canny_high: int = 150
edge_hough_threshold: int = 80
edge_hough_min_length: int = 100
edge_hough_max_gap: int = 10
edge_pair_max_distance: int = 200
edge_pair_min_distance: int = 15
@dataclass
class RegionBox:
"""A candidate region from CV analysis."""
x: int
y: int
w: int
h: int
confidence: float
label: str
@dataclass
class AnalyzeRegionsResponse:
"""Response from CV region analysis."""
regions: List[RegionBox] = field(default_factory=list)
@dataclass
class AnalyzeRegionsDebugResponse:
"""Response from CV region analysis with debug overlays."""
regions: List[RegionBox] = field(default_factory=list)
edge_overlay_b64: str = "" # Canny edge image as base64 JPEG
lines_overlay_b64: str = "" # frame with Hough lines drawn
horizontal_count: int = 0
pair_count: int = 0
# --- Field Segmentation ---
@dataclass
class SegmentFieldRequest:
"""Request body for field segmentation."""
image: str # base64 JPEG
hue_low: int = 30
hue_high: int = 85
sat_low: int = 30
sat_high: int = 255
val_low: int = 30
val_high: int = 255
morph_kernel: int = 15
min_area_ratio: float = 0.05
@dataclass
class SegmentFieldResponse:
"""Response from field segmentation."""
boundary: List[List[int]] = field(default_factory=list)
coverage: float = 0.0
mask_b64: str = "" # binary mask as base64 PNG (for downstream stages)
@dataclass
class SegmentFieldDebugResponse:
"""Response from field segmentation with debug overlay."""
boundary: List[List[int]] = field(default_factory=list)
coverage: float = 0.0
mask_overlay_b64: str = ""
# --- Server Config ---
@dataclass
class ConfigUpdate:
"""Request body for updating server configuration."""
device: Optional[str] = None
yolo_model: Optional[str] = None
yolo_confidence: Optional[float] = None
vram_budget_mb: Optional[int] = None
strategy: Optional[str] = None
ocr_languages: Optional[List[str]] = None
ocr_min_confidence: Optional[float] = None
# --- Export list for modelgen ---
INFERENCE_VIEWS = [
DetectRequest,
BBox,
DetectResponse,
OCRRequest,
OCRTextResult,
OCRResponse,
PreprocessRequest,
PreprocessResponse,
VLMRequest,
VLMResponse,
AnalyzeRegionsRequest,
RegionBox,
AnalyzeRegionsResponse,
AnalyzeRegionsDebugResponse,
SegmentFieldRequest,
SegmentFieldResponse,
SegmentFieldDebugResponse,
ConfigUpdate,
]