phase cv 0

2026-03-26 22:22:35 -03:00
parent beb0416280
commit 65814b5b9e
46 changed files with 2962 additions and 268 deletions
--- a/core/schema/models/inference.py
+++ b/core/schema/models/inference.py
@@ -0,0 +1,197 @@
+"""
+Inference Server API Schema Definitions
+
+Source of truth for GPU inference server request/response types.
+Generates: Pydantic (gpu/models/inference_contract.py)
+
+These are the wire-format types for the HTTP API between the
+pipeline (detect/) and the inference server (gpu/).
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+
+# --- Object Detection (YOLO) ---
+
+
+@dataclass
+class DetectRequest:
+    """Request body for object detection."""
+
+    image: str  # base64 JPEG
+    model: Optional[str] = None
+    confidence: Optional[float] = None
+    target_classes: Optional[List[str]] = None
+
+
+@dataclass
+class BBox:
+    """A detected bounding box."""
+
+    x: int
+    y: int
+    w: int
+    h: int
+    confidence: float
+    label: str
+
+
+@dataclass
+class DetectResponse:
+    """Response from object detection."""
+
+    detections: List[BBox] = field(default_factory=list)
+
+
+# --- OCR ---
+
+
+@dataclass
+class OCRRequest:
+    """Request body for OCR."""
+
+    image: str  # base64 JPEG
+    languages: Optional[List[str]] = None
+
+
+@dataclass
+class OCRTextResult:
+    """A single OCR text extraction result."""
+
+    text: str
+    confidence: float
+    bbox: List[int] = field(default_factory=list)  # [x, y, w, h]
+
+
+@dataclass
+class OCRResponse:
+    """Response from OCR."""
+
+    results: List[OCRTextResult] = field(default_factory=list)
+
+
+# --- Preprocessing ---
+
+
+@dataclass
+class PreprocessRequest:
+    """Request body for image preprocessing."""
+
+    image: str  # base64 JPEG
+    binarize: bool = False
+    deskew: bool = False
+    contrast: bool = True
+
+
+@dataclass
+class PreprocessResponse:
+    """Response from preprocessing."""
+
+    image: str  # base64 JPEG of processed image
+
+
+# --- VLM ---
+
+
+@dataclass
+class VLMRequest:
+    """Request body for visual language model query."""
+
+    image: str  # base64 JPEG
+    prompt: str
+    model: Optional[str] = None
+
+
+@dataclass
+class VLMResponse:
+    """Response from VLM."""
+
+    brand: str
+    confidence: float
+    reasoning: str
+
+
+# --- CV Region Analysis ---
+
+
+@dataclass
+class AnalyzeRegionsRequest:
+    """Request body for CV region analysis."""
+
+    image: str  # base64 JPEG
+    # Edge detection (Canny + HoughLinesP)
+    edge_canny_low: int = 50
+    edge_canny_high: int = 150
+    edge_hough_threshold: int = 80
+    edge_hough_min_length: int = 100
+    edge_hough_max_gap: int = 10
+    edge_pair_max_distance: int = 200
+    edge_pair_min_distance: int = 15
+
+
+@dataclass
+class RegionBox:
+    """A candidate region from CV analysis."""
+
+    x: int
+    y: int
+    w: int
+    h: int
+    confidence: float
+    label: str
+
+
+@dataclass
+class AnalyzeRegionsResponse:
+    """Response from CV region analysis."""
+
+    regions: List[RegionBox] = field(default_factory=list)
+
+
+@dataclass
+class AnalyzeRegionsDebugResponse:
+    """Response from CV region analysis with debug overlays."""
+
+    regions: List[RegionBox] = field(default_factory=list)
+    edge_overlay_b64: str = ""       # Canny edge image as base64 JPEG
+    lines_overlay_b64: str = ""      # frame with Hough lines drawn
+    horizontal_count: int = 0
+    pair_count: int = 0
+
+
+# --- Server Config ---
+
+
+@dataclass
+class ConfigUpdate:
+    """Request body for updating server configuration."""
+
+    device: Optional[str] = None
+    yolo_model: Optional[str] = None
+    yolo_confidence: Optional[float] = None
+    vram_budget_mb: Optional[int] = None
+    strategy: Optional[str] = None
+    ocr_languages: Optional[List[str]] = None
+    ocr_min_confidence: Optional[float] = None
+
+
+# --- Export list for modelgen ---
+
+INFERENCE_VIEWS = [
+    DetectRequest,
+    BBox,
+    DetectResponse,
+    OCRRequest,
+    OCRTextResult,
+    OCRResponse,
+    PreprocessRequest,
+    PreprocessResponse,
+    VLMRequest,
+    VLMResponse,
+    AnalyzeRegionsRequest,
+    RegionBox,
+    AnalyzeRegionsResponse,
+    AnalyzeRegionsDebugResponse,
+    ConfigUpdate,
+]