""" Inference Server API Schema Definitions Source of truth for GPU inference server request/response types. Generates: Pydantic (gpu/models/inference_contract.py) These are the wire-format types for the HTTP API between the pipeline (detect/) and the inference server (gpu/). """ from dataclasses import dataclass, field from typing import List, Optional # --- Object Detection (YOLO) --- @dataclass class DetectRequest: """Request body for object detection.""" image: str # base64 JPEG model: Optional[str] = None confidence: Optional[float] = None target_classes: Optional[List[str]] = None @dataclass class BBox: """A detected bounding box.""" x: int y: int w: int h: int confidence: float label: str @dataclass class DetectResponse: """Response from object detection.""" detections: List[BBox] = field(default_factory=list) # --- OCR --- @dataclass class OCRRequest: """Request body for OCR.""" image: str # base64 JPEG languages: Optional[List[str]] = None @dataclass class OCRTextResult: """A single OCR text extraction result.""" text: str confidence: float bbox: List[int] = field(default_factory=list) # [x, y, w, h] @dataclass class OCRResponse: """Response from OCR.""" results: List[OCRTextResult] = field(default_factory=list) # --- Preprocessing --- @dataclass class PreprocessRequest: """Request body for image preprocessing.""" image: str # base64 JPEG binarize: bool = False deskew: bool = False contrast: bool = True @dataclass class PreprocessResponse: """Response from preprocessing.""" image: str # base64 JPEG of processed image # --- VLM --- @dataclass class VLMRequest: """Request body for visual language model query.""" image: str # base64 JPEG prompt: str model: Optional[str] = None @dataclass class VLMResponse: """Response from VLM.""" brand: str confidence: float reasoning: str # --- CV Region Analysis --- @dataclass class AnalyzeRegionsRequest: """Request body for CV region analysis.""" image: str # base64 JPEG # Edge detection (Canny + HoughLinesP) edge_canny_low: int = 50 edge_canny_high: int = 150 edge_hough_threshold: int = 80 edge_hough_min_length: int = 100 edge_hough_max_gap: int = 10 edge_pair_max_distance: int = 200 edge_pair_min_distance: int = 15 @dataclass class RegionBox: """A candidate region from CV analysis.""" x: int y: int w: int h: int confidence: float label: str @dataclass class AnalyzeRegionsResponse: """Response from CV region analysis.""" regions: List[RegionBox] = field(default_factory=list) @dataclass class AnalyzeRegionsDebugResponse: """Response from CV region analysis with debug overlays.""" regions: List[RegionBox] = field(default_factory=list) edge_overlay_b64: str = "" # Canny edge image as base64 JPEG lines_overlay_b64: str = "" # frame with Hough lines drawn horizontal_count: int = 0 pair_count: int = 0 # --- Server Config --- @dataclass class ConfigUpdate: """Request body for updating server configuration.""" device: Optional[str] = None yolo_model: Optional[str] = None yolo_confidence: Optional[float] = None vram_budget_mb: Optional[int] = None strategy: Optional[str] = None ocr_languages: Optional[List[str]] = None ocr_min_confidence: Optional[float] = None # --- Export list for modelgen --- INFERENCE_VIEWS = [ DetectRequest, BBox, DetectResponse, OCRRequest, OCRTextResult, OCRResponse, PreprocessRequest, PreprocessResponse, VLMRequest, VLMResponse, AnalyzeRegionsRequest, RegionBox, AnalyzeRegionsResponse, AnalyzeRegionsDebugResponse, ConfigUpdate, ]