phase cv 0

This commit is contained in:
2026-03-26 22:22:35 -03:00
parent beb0416280
commit 65814b5b9e
46 changed files with 2962 additions and 268 deletions

View File

@@ -40,6 +40,11 @@
"target": "typescript",
"output": "ui/detection-app/src/types/store-state.ts",
"include": ["ui_state_views"]
},
{
"target": "pydantic",
"output": "gpu/models/inference_contract.py",
"include": ["inference_views"]
}
]
}

View File

@@ -33,6 +33,7 @@ from .detect_jobs import (
from .media import AssetStatus, MediaAsset
from .presets import BUILTIN_PRESETS, TranscodePreset
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
from .inference import INFERENCE_VIEWS # noqa: F401 — GPU inference server API types
from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
from .sources import ChunkInfo, SourceJob, SourceType

View File

@@ -53,6 +53,7 @@ class BoundingBoxEvent:
label: str
resolved_brand: Optional[str] = None
source: Optional[str] = None
stage: Optional[str] = None
@dataclass
@@ -85,6 +86,7 @@ class StatsUpdate:
frames_extracted: int = 0
frames_after_scene_filter: int = 0
cv_regions_detected: int = 0
regions_detected: int = 0
regions_resolved_by_ocr: int = 0
regions_escalated_to_local_vlm: int = 0
@@ -166,6 +168,8 @@ class CheckpointInfo:
"""Available checkpoint for a stage."""
stage: str
is_scenario: bool = False
scenario_label: str = ""
@dataclass

View File

@@ -93,13 +93,12 @@ class StageCheckpoint:
frames_meta: List[Dict[str, Any]] = field(default_factory=list) # sequence, chunk_id, timestamp, hash
filtered_frame_sequences: List[int] = field(default_factory=list)
# Detection state (full structured data, not just summaries)
boxes_by_frame: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
text_candidates: List[Dict[str, Any]] = field(default_factory=list)
unresolved_candidates: List[Dict[str, Any]] = field(default_factory=list)
detections: List[Dict[str, Any]] = field(default_factory=list)
# Stage output — stored as blob in MinIO: checkpoints/{job_id}/stages/{stage}.bson
# Each stage's serialize_fn/deserialize_fn owns the format.
# Postgres only stores the S3 key, not the data itself.
stage_output_key: str = "" # s3 key to the serialized stage output
# Pipeline state
# Pipeline state (small, stays in Postgres)
stats: Dict[str, Any] = field(default_factory=dict)
config_snapshot: Dict[str, Any] = field(default_factory=dict)
config_overrides: Dict[str, Any] = field(default_factory=dict)
@@ -108,6 +107,13 @@ class StageCheckpoint:
video_path: str = ""
profile_name: str = ""
# Scenario — a checkpoint bookmarked for the editor workflow.
# Created by seeders (manual scripts that populate state from real footage)
# or captured from a running pipeline. Loaded via URL:
# /detection/?job=<job_id>&stage=<stage>&editor=true
is_scenario: bool = False
scenario_label: str = "" # human-readable name, e.g. "chelsea_edges_lowcanny"
# Timestamps
created_at: Optional[datetime] = None

View File

@@ -70,6 +70,7 @@ class BrandStats:
class PipelineStats:
frames_extracted: int = 0
frames_after_scene_filter: int = 0
cv_regions_detected: int = 0
regions_detected: int = 0
regions_resolved_by_ocr: int = 0
regions_escalated_to_local_vlm: int = 0

View File

@@ -0,0 +1,197 @@
"""
Inference Server API Schema Definitions
Source of truth for GPU inference server request/response types.
Generates: Pydantic (gpu/models/inference_contract.py)
These are the wire-format types for the HTTP API between the
pipeline (detect/) and the inference server (gpu/).
"""
from dataclasses import dataclass, field
from typing import List, Optional
# --- Object Detection (YOLO) ---
@dataclass
class DetectRequest:
"""Request body for object detection."""
image: str # base64 JPEG
model: Optional[str] = None
confidence: Optional[float] = None
target_classes: Optional[List[str]] = None
@dataclass
class BBox:
"""A detected bounding box."""
x: int
y: int
w: int
h: int
confidence: float
label: str
@dataclass
class DetectResponse:
"""Response from object detection."""
detections: List[BBox] = field(default_factory=list)
# --- OCR ---
@dataclass
class OCRRequest:
"""Request body for OCR."""
image: str # base64 JPEG
languages: Optional[List[str]] = None
@dataclass
class OCRTextResult:
"""A single OCR text extraction result."""
text: str
confidence: float
bbox: List[int] = field(default_factory=list) # [x, y, w, h]
@dataclass
class OCRResponse:
"""Response from OCR."""
results: List[OCRTextResult] = field(default_factory=list)
# --- Preprocessing ---
@dataclass
class PreprocessRequest:
"""Request body for image preprocessing."""
image: str # base64 JPEG
binarize: bool = False
deskew: bool = False
contrast: bool = True
@dataclass
class PreprocessResponse:
"""Response from preprocessing."""
image: str # base64 JPEG of processed image
# --- VLM ---
@dataclass
class VLMRequest:
"""Request body for visual language model query."""
image: str # base64 JPEG
prompt: str
model: Optional[str] = None
@dataclass
class VLMResponse:
"""Response from VLM."""
brand: str
confidence: float
reasoning: str
# --- CV Region Analysis ---
@dataclass
class AnalyzeRegionsRequest:
"""Request body for CV region analysis."""
image: str # base64 JPEG
# Edge detection (Canny + HoughLinesP)
edge_canny_low: int = 50
edge_canny_high: int = 150
edge_hough_threshold: int = 80
edge_hough_min_length: int = 100
edge_hough_max_gap: int = 10
edge_pair_max_distance: int = 200
edge_pair_min_distance: int = 15
@dataclass
class RegionBox:
"""A candidate region from CV analysis."""
x: int
y: int
w: int
h: int
confidence: float
label: str
@dataclass
class AnalyzeRegionsResponse:
"""Response from CV region analysis."""
regions: List[RegionBox] = field(default_factory=list)
@dataclass
class AnalyzeRegionsDebugResponse:
"""Response from CV region analysis with debug overlays."""
regions: List[RegionBox] = field(default_factory=list)
edge_overlay_b64: str = "" # Canny edge image as base64 JPEG
lines_overlay_b64: str = "" # frame with Hough lines drawn
horizontal_count: int = 0
pair_count: int = 0
# --- Server Config ---
@dataclass
class ConfigUpdate:
"""Request body for updating server configuration."""
device: Optional[str] = None
yolo_model: Optional[str] = None
yolo_confidence: Optional[float] = None
vram_budget_mb: Optional[int] = None
strategy: Optional[str] = None
ocr_languages: Optional[List[str]] = None
ocr_min_confidence: Optional[float] = None
# --- Export list for modelgen ---
INFERENCE_VIEWS = [
DetectRequest,
BBox,
DetectResponse,
OCRRequest,
OCRTextResult,
OCRResponse,
PreprocessRequest,
PreprocessResponse,
VLMRequest,
VLMResponse,
AnalyzeRegionsRequest,
RegionBox,
AnalyzeRegionsResponse,
AnalyzeRegionsDebugResponse,
ConfigUpdate,
]