77 lines
1.5 KiB
Python
77 lines
1.5 KiB
Python
"""
|
|
Inference response types.
|
|
|
|
These are the shapes returned by the inference server.
|
|
Kept separate from detect.models to avoid coupling the
|
|
inference protocol to pipeline internals.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class DetectResult:
|
|
"""Single object detection from YOLO or similar."""
|
|
x: int
|
|
y: int
|
|
w: int
|
|
h: int
|
|
confidence: float
|
|
label: str
|
|
|
|
|
|
@dataclass
|
|
class OCRResult:
|
|
"""Text extracted from a region."""
|
|
text: str
|
|
confidence: float
|
|
bbox: tuple[int, int, int, int] # x, y, w, h
|
|
|
|
|
|
@dataclass
|
|
class VLMResult:
|
|
"""Visual language model response for a crop."""
|
|
brand: str
|
|
confidence: float
|
|
reasoning: str
|
|
|
|
|
|
@dataclass
|
|
class RegionResult:
|
|
"""A candidate region from CV analysis."""
|
|
x: int
|
|
y: int
|
|
w: int
|
|
h: int
|
|
confidence: float
|
|
label: str
|
|
|
|
|
|
@dataclass
|
|
class RegionDebugResult:
|
|
"""CV region analysis with debug overlays."""
|
|
regions: list[RegionResult] = field(default_factory=list)
|
|
edge_overlay_b64: str = ""
|
|
lines_overlay_b64: str = ""
|
|
horizontal_count: int = 0
|
|
pair_count: int = 0
|
|
|
|
|
|
@dataclass
|
|
class ModelInfo:
|
|
"""Info about a loaded model."""
|
|
name: str
|
|
vram_mb: float
|
|
quantization: str # fp32, fp16, int8, int4
|
|
|
|
|
|
@dataclass
|
|
class ServerStatus:
|
|
"""Inference server health response."""
|
|
loaded_models: list[ModelInfo] = field(default_factory=list)
|
|
vram_used_mb: float = 0.0
|
|
vram_budget_mb: float = 0.0
|
|
strategy: str = "sequential" # sequential, concurrent, auto
|