""" Inference response types. These are the shapes returned by the inference server. Kept separate from detect.models to avoid coupling the inference protocol to pipeline internals. """ from __future__ import annotations from dataclasses import dataclass, field @dataclass class DetectResult: """Single object detection from YOLO or similar.""" x: int y: int w: int h: int confidence: float label: str @dataclass class OCRResult: """Text extracted from a region.""" text: str confidence: float bbox: tuple[int, int, int, int] # x, y, w, h @dataclass class VLMResult: """Visual language model response for a crop.""" brand: str confidence: float reasoning: str @dataclass class RegionResult: """A candidate region from CV analysis.""" x: int y: int w: int h: int confidence: float label: str @dataclass class RegionDebugResult: """CV region analysis with debug overlays.""" regions: list[RegionResult] = field(default_factory=list) edge_overlay_b64: str = "" lines_overlay_b64: str = "" horizontal_count: int = 0 pair_count: int = 0 @dataclass class ModelInfo: """Info about a loaded model.""" name: str vram_mb: float quantization: str # fp32, fp16, int8, int4 @dataclass class ServerStatus: """Inference server health response.""" loaded_models: list[ModelInfo] = field(default_factory=list) vram_used_mb: float = 0.0 vram_budget_mb: float = 0.0 strategy: str = "sequential" # sequential, concurrent, auto