phase 4

2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions
--- a/core/detect/inference/init.py
+++ b/core/detect/inference/init.py
@@ -0,0 +1,4 @@
+from .client import InferenceClient
+from .types import DetectResult, OCRResult, VLMResult
+
+__all__ = ["InferenceClient", "DetectResult", "OCRResult", "VLMResult"]
--- a/core/detect/inference/client.py
+++ b/core/detect/inference/client.py
@@ -0,0 +1,262 @@
+"""
+HTTP client for the inference server.
+
+The pipeline stages call this instead of importing ML libraries directly.
+The inference server runs on the GPU machine (or spot instance).
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import logging
+import os
+
+import numpy as np
+import requests
+from PIL import Image
+
+from .types import DetectResult, OCRResult, RegionDebugResult, RegionResult, ServerStatus, VLMResult
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_URL = os.environ.get("INFERENCE_URL", "http://localhost:8000")
+
+
+def _encode_image(image: np.ndarray) -> str:
+    """Encode numpy array as base64 JPEG."""
+    img = Image.fromarray(image)
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=85)
+    return base64.b64encode(buf.getvalue()).decode()
+
+
+class InferenceClient:
+    """HTTP client for the GPU inference server."""
+
+    def __init__(self, base_url: str | None = None, timeout: float = 60.0,
+                 job_id: str = "", log_level: str = "INFO"):
+        self.base_url = (base_url or DEFAULT_URL).rstrip("/")
+        self.timeout = timeout
+        self.job_id = job_id
+        self.log_level = log_level
+        self.session = requests.Session()
+        if job_id:
+            self.session.headers["X-Job-Id"] = job_id
+            self.session.headers["X-Log-Level"] = log_level
+
+    def health(self) -> ServerStatus:
+        """Check server health and loaded models."""
+        resp = self.session.get(f"{self.base_url}/health", timeout=self.timeout)
+        resp.raise_for_status()
+        data = resp.json()
+        return ServerStatus(
+            loaded_models=data.get("loaded_models", []),
+            vram_used_mb=data.get("vram_used_mb", 0),
+            vram_budget_mb=data.get("vram_budget_mb", 0),
+            strategy=data.get("strategy", "sequential"),
+        )
+
+    def detect(
+        self,
+        image: np.ndarray,
+        model: str = "yolov8n",
+        confidence: float = 0.3,
+        target_classes: list[str] | None = None,
+    ) -> list[DetectResult]:
+        """Run object detection on an image."""
+        payload = {
+            "image": _encode_image(image),
+            "model": model,
+            "confidence": confidence,
+        }
+        if target_classes:
+            payload["target_classes"] = target_classes
+
+        resp = self.session.post(
+            f"{self.base_url}/detect",
+            json=payload,
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+
+        results = []
+        for d in resp.json().get("detections", []):
+            result = DetectResult(
+                x=d["x"], y=d["y"], w=d["w"], h=d["h"],
+                confidence=d["confidence"], label=d["label"],
+            )
+            results.append(result)
+        return results
+
+    def ocr(
+        self,
+        image: np.ndarray,
+        languages: list[str] | None = None,
+    ) -> list[OCRResult]:
+        """Run OCR on an image region."""
+        payload = {
+            "image": _encode_image(image),
+        }
+        if languages:
+            payload["languages"] = languages
+
+        resp = self.session.post(
+            f"{self.base_url}/ocr",
+            json=payload,
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+
+        results = []
+        for d in resp.json().get("results", []):
+            result = OCRResult(
+                text=d["text"],
+                confidence=d["confidence"],
+                bbox=tuple(d["bbox"]),
+            )
+            results.append(result)
+        return results
+
+    def vlm(
+        self,
+        image: np.ndarray,
+        prompt: str,
+        model: str = "moondream2",
+    ) -> VLMResult:
+        """Query a visual language model with an image crop + prompt."""
+        payload = {
+            "image": _encode_image(image),
+            "prompt": prompt,
+            "model": model,
+        }
+
+        resp = self.session.post(
+            f"{self.base_url}/vlm",
+            json=payload,
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+
+        data = resp.json()
+        return VLMResult(
+            brand=data.get("brand", ""),
+            confidence=data.get("confidence", 0.0),
+            reasoning=data.get("reasoning", ""),
+        )
+
+    def detect_edges(
+        self,
+        image: np.ndarray,
+        edge_canny_low: int = 50,
+        edge_canny_high: int = 150,
+        edge_hough_threshold: int = 80,
+        edge_hough_min_length: int = 100,
+        edge_hough_max_gap: int = 10,
+        edge_pair_max_distance: int = 200,
+        edge_pair_min_distance: int = 15,
+    ) -> list[RegionResult]:
+        """Run edge detection on an image."""
+        payload = {
+            "image": _encode_image(image),
+            "edge_canny_low": edge_canny_low,
+            "edge_canny_high": edge_canny_high,
+            "edge_hough_threshold": edge_hough_threshold,
+            "edge_hough_min_length": edge_hough_min_length,
+            "edge_hough_max_gap": edge_hough_max_gap,
+            "edge_pair_max_distance": edge_pair_max_distance,
+            "edge_pair_min_distance": edge_pair_min_distance,
+        }
+
+        resp = self.session.post(
+            f"{self.base_url}/detect_edges",
+            json=payload,
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+
+        results = []
+        for r in resp.json().get("regions", []):
+            result = RegionResult(
+                x=r["x"], y=r["y"], w=r["w"], h=r["h"],
+                confidence=r["confidence"], label=r["label"],
+            )
+            results.append(result)
+        return results
+
+    def detect_edges_debug(
+        self,
+        image: np.ndarray,
+        edge_canny_low: int = 50,
+        edge_canny_high: int = 150,
+        edge_hough_threshold: int = 80,
+        edge_hough_min_length: int = 100,
+        edge_hough_max_gap: int = 10,
+        edge_pair_max_distance: int = 200,
+        edge_pair_min_distance: int = 15,
+    ) -> RegionDebugResult:
+        """Run edge detection with debug overlays."""
+        payload = {
+            "image": _encode_image(image),
+            "edge_canny_low": edge_canny_low,
+            "edge_canny_high": edge_canny_high,
+            "edge_hough_threshold": edge_hough_threshold,
+            "edge_hough_min_length": edge_hough_min_length,
+            "edge_hough_max_gap": edge_hough_max_gap,
+            "edge_pair_max_distance": edge_pair_max_distance,
+            "edge_pair_min_distance": edge_pair_min_distance,
+        }
+
+        resp = self.session.post(
+            f"{self.base_url}/detect_edges/debug",
+            json=payload,
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+
+        data = resp.json()
+        regions = []
+        for r in data.get("regions", []):
+            region = RegionResult(
+                x=r["x"], y=r["y"], w=r["w"], h=r["h"],
+                confidence=r["confidence"], label=r["label"],
+            )
+            regions.append(region)
+
+        return RegionDebugResult(
+            regions=regions,
+            edge_overlay_b64=data.get("edge_overlay_b64", ""),
+            lines_overlay_b64=data.get("lines_overlay_b64", ""),
+            horizontal_count=data.get("horizontal_count", 0),
+            pair_count=data.get("pair_count", 0),
+        )
+
+    def post(self, path: str, payload: dict) -> dict | None:
+        """Generic POST to the inference server. Returns JSON response or None on error."""
+        try:
+            resp = self.session.post(
+                f"{self.base_url}{path}",
+                json=payload,
+                timeout=self.timeout,
+            )
+            resp.raise_for_status()
+            return resp.json()
+        except Exception as e:
+            logger.warning("Inference POST %s failed: %s", path, e)
+            return None
+
+    def load_model(self, model: str, quantization: str = "fp16") -> None:
+        """Request the server to load a model into VRAM."""
+        self.session.post(
+            f"{self.base_url}/models/load",
+            json={"model": model, "quantization": quantization},
+            timeout=self.timeout,
+        ).raise_for_status()
+
+    def unload_model(self, model: str) -> None:
+        """Request the server to unload a model from VRAM."""
+        self.session.post(
+            f"{self.base_url}/models/unload",
+            json={"model": model},
+            timeout=self.timeout,
+        ).raise_for_status()
--- a/core/detect/inference/types.py
+++ b/core/detect/inference/types.py
@@ -0,0 +1,76 @@
+"""
+Inference response types.
+
+These are the shapes returned by the inference server.
+Kept separate from core.detect.models to avoid coupling the
+inference protocol to pipeline internals.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class DetectResult:
+    """Single object detection from YOLO or similar."""
+    x: int
+    y: int
+    w: int
+    h: int
+    confidence: float
+    label: str
+
+
+@dataclass
+class OCRResult:
+    """Text extracted from a region."""
+    text: str
+    confidence: float
+    bbox: tuple[int, int, int, int]  # x, y, w, h
+
+
+@dataclass
+class VLMResult:
+    """Visual language model response for a crop."""
+    brand: str
+    confidence: float
+    reasoning: str
+
+
+@dataclass
+class RegionResult:
+    """A candidate region from CV analysis."""
+    x: int
+    y: int
+    w: int
+    h: int
+    confidence: float
+    label: str
+
+
+@dataclass
+class RegionDebugResult:
+    """CV region analysis with debug overlays."""
+    regions: list[RegionResult] = field(default_factory=list)
+    edge_overlay_b64: str = ""
+    lines_overlay_b64: str = ""
+    horizontal_count: int = 0
+    pair_count: int = 0
+
+
+@dataclass
+class ModelInfo:
+    """Info about a loaded model."""
+    name: str
+    vram_mb: float
+    quantization: str  # fp32, fp16, int8, int4
+
+
+@dataclass
+class ServerStatus:
+    """Inference server health response."""
+    loaded_models: list[ModelInfo] = field(default_factory=list)
+    vram_used_mb: float = 0.0
+    vram_budget_mb: float = 0.0
+    strategy: str = "sequential"  # sequential, concurrent, auto