phase cv 0
This commit is contained in:
197
core/schema/models/inference.py
Normal file
197
core/schema/models/inference.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Inference Server API Schema Definitions
|
||||
|
||||
Source of truth for GPU inference server request/response types.
|
||||
Generates: Pydantic (gpu/models/inference_contract.py)
|
||||
|
||||
These are the wire-format types for the HTTP API between the
|
||||
pipeline (detect/) and the inference server (gpu/).
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
# --- Object Detection (YOLO) ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectRequest:
|
||||
"""Request body for object detection."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
model: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
target_classes: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BBox:
|
||||
"""A detected bounding box."""
|
||||
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectResponse:
|
||||
"""Response from object detection."""
|
||||
|
||||
detections: List[BBox] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- OCR ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRRequest:
|
||||
"""Request body for OCR."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
languages: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRTextResult:
|
||||
"""A single OCR text extraction result."""
|
||||
|
||||
text: str
|
||||
confidence: float
|
||||
bbox: List[int] = field(default_factory=list) # [x, y, w, h]
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRResponse:
|
||||
"""Response from OCR."""
|
||||
|
||||
results: List[OCRTextResult] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- Preprocessing ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessRequest:
|
||||
"""Request body for image preprocessing."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
binarize: bool = False
|
||||
deskew: bool = False
|
||||
contrast: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessResponse:
|
||||
"""Response from preprocessing."""
|
||||
|
||||
image: str # base64 JPEG of processed image
|
||||
|
||||
|
||||
# --- VLM ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class VLMRequest:
|
||||
"""Request body for visual language model query."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
prompt: str
|
||||
model: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VLMResponse:
|
||||
"""Response from VLM."""
|
||||
|
||||
brand: str
|
||||
confidence: float
|
||||
reasoning: str
|
||||
|
||||
|
||||
# --- CV Region Analysis ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsRequest:
|
||||
"""Request body for CV region analysis."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
# Edge detection (Canny + HoughLinesP)
|
||||
edge_canny_low: int = 50
|
||||
edge_canny_high: int = 150
|
||||
edge_hough_threshold: int = 80
|
||||
edge_hough_min_length: int = 100
|
||||
edge_hough_max_gap: int = 10
|
||||
edge_pair_max_distance: int = 200
|
||||
edge_pair_min_distance: int = 15
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegionBox:
|
||||
"""A candidate region from CV analysis."""
|
||||
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsResponse:
|
||||
"""Response from CV region analysis."""
|
||||
|
||||
regions: List[RegionBox] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsDebugResponse:
|
||||
"""Response from CV region analysis with debug overlays."""
|
||||
|
||||
regions: List[RegionBox] = field(default_factory=list)
|
||||
edge_overlay_b64: str = "" # Canny edge image as base64 JPEG
|
||||
lines_overlay_b64: str = "" # frame with Hough lines drawn
|
||||
horizontal_count: int = 0
|
||||
pair_count: int = 0
|
||||
|
||||
|
||||
# --- Server Config ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConfigUpdate:
|
||||
"""Request body for updating server configuration."""
|
||||
|
||||
device: Optional[str] = None
|
||||
yolo_model: Optional[str] = None
|
||||
yolo_confidence: Optional[float] = None
|
||||
vram_budget_mb: Optional[int] = None
|
||||
strategy: Optional[str] = None
|
||||
ocr_languages: Optional[List[str]] = None
|
||||
ocr_min_confidence: Optional[float] = None
|
||||
|
||||
|
||||
# --- Export list for modelgen ---
|
||||
|
||||
INFERENCE_VIEWS = [
|
||||
DetectRequest,
|
||||
BBox,
|
||||
DetectResponse,
|
||||
OCRRequest,
|
||||
OCRTextResult,
|
||||
OCRResponse,
|
||||
PreprocessRequest,
|
||||
PreprocessResponse,
|
||||
VLMRequest,
|
||||
VLMResponse,
|
||||
AnalyzeRegionsRequest,
|
||||
RegionBox,
|
||||
AnalyzeRegionsResponse,
|
||||
AnalyzeRegionsDebugResponse,
|
||||
ConfigUpdate,
|
||||
]
|
||||
Reference in New Issue
Block a user