phase cv 0
This commit is contained in:
@@ -40,6 +40,11 @@
|
||||
"target": "typescript",
|
||||
"output": "ui/detection-app/src/types/store-state.ts",
|
||||
"include": ["ui_state_views"]
|
||||
},
|
||||
{
|
||||
"target": "pydantic",
|
||||
"output": "gpu/models/inference_contract.py",
|
||||
"include": ["inference_views"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ from .detect_jobs import (
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
from .inference import INFERENCE_VIEWS # noqa: F401 — GPU inference server API types
|
||||
from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
|
||||
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
|
||||
from .sources import ChunkInfo, SourceJob, SourceType
|
||||
|
||||
@@ -53,6 +53,7 @@ class BoundingBoxEvent:
|
||||
label: str
|
||||
resolved_brand: Optional[str] = None
|
||||
source: Optional[str] = None
|
||||
stage: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -85,6 +86,7 @@ class StatsUpdate:
|
||||
|
||||
frames_extracted: int = 0
|
||||
frames_after_scene_filter: int = 0
|
||||
cv_regions_detected: int = 0
|
||||
regions_detected: int = 0
|
||||
regions_resolved_by_ocr: int = 0
|
||||
regions_escalated_to_local_vlm: int = 0
|
||||
@@ -166,6 +168,8 @@ class CheckpointInfo:
|
||||
"""Available checkpoint for a stage."""
|
||||
|
||||
stage: str
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -93,13 +93,12 @@ class StageCheckpoint:
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list) # sequence, chunk_id, timestamp, hash
|
||||
filtered_frame_sequences: List[int] = field(default_factory=list)
|
||||
|
||||
# Detection state (full structured data, not just summaries)
|
||||
boxes_by_frame: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
|
||||
text_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
||||
unresolved_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
||||
detections: List[Dict[str, Any]] = field(default_factory=list)
|
||||
# Stage output — stored as blob in MinIO: checkpoints/{job_id}/stages/{stage}.bson
|
||||
# Each stage's serialize_fn/deserialize_fn owns the format.
|
||||
# Postgres only stores the S3 key, not the data itself.
|
||||
stage_output_key: str = "" # s3 key to the serialized stage output
|
||||
|
||||
# Pipeline state
|
||||
# Pipeline state (small, stays in Postgres)
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
config_snapshot: Dict[str, Any] = field(default_factory=dict)
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
@@ -108,6 +107,13 @@ class StageCheckpoint:
|
||||
video_path: str = ""
|
||||
profile_name: str = ""
|
||||
|
||||
# Scenario — a checkpoint bookmarked for the editor workflow.
|
||||
# Created by seeders (manual scripts that populate state from real footage)
|
||||
# or captured from a running pipeline. Loaded via URL:
|
||||
# /detection/?job=<job_id>&stage=<stage>&editor=true
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = "" # human-readable name, e.g. "chelsea_edges_lowcanny"
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ class BrandStats:
|
||||
class PipelineStats:
|
||||
frames_extracted: int = 0
|
||||
frames_after_scene_filter: int = 0
|
||||
cv_regions_detected: int = 0
|
||||
regions_detected: int = 0
|
||||
regions_resolved_by_ocr: int = 0
|
||||
regions_escalated_to_local_vlm: int = 0
|
||||
|
||||
197
core/schema/models/inference.py
Normal file
197
core/schema/models/inference.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Inference Server API Schema Definitions
|
||||
|
||||
Source of truth for GPU inference server request/response types.
|
||||
Generates: Pydantic (gpu/models/inference_contract.py)
|
||||
|
||||
These are the wire-format types for the HTTP API between the
|
||||
pipeline (detect/) and the inference server (gpu/).
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
# --- Object Detection (YOLO) ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectRequest:
|
||||
"""Request body for object detection."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
model: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
target_classes: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BBox:
|
||||
"""A detected bounding box."""
|
||||
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectResponse:
|
||||
"""Response from object detection."""
|
||||
|
||||
detections: List[BBox] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- OCR ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRRequest:
|
||||
"""Request body for OCR."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
languages: Optional[List[str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRTextResult:
|
||||
"""A single OCR text extraction result."""
|
||||
|
||||
text: str
|
||||
confidence: float
|
||||
bbox: List[int] = field(default_factory=list) # [x, y, w, h]
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRResponse:
|
||||
"""Response from OCR."""
|
||||
|
||||
results: List[OCRTextResult] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- Preprocessing ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessRequest:
|
||||
"""Request body for image preprocessing."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
binarize: bool = False
|
||||
deskew: bool = False
|
||||
contrast: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessResponse:
|
||||
"""Response from preprocessing."""
|
||||
|
||||
image: str # base64 JPEG of processed image
|
||||
|
||||
|
||||
# --- VLM ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class VLMRequest:
|
||||
"""Request body for visual language model query."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
prompt: str
|
||||
model: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VLMResponse:
|
||||
"""Response from VLM."""
|
||||
|
||||
brand: str
|
||||
confidence: float
|
||||
reasoning: str
|
||||
|
||||
|
||||
# --- CV Region Analysis ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsRequest:
|
||||
"""Request body for CV region analysis."""
|
||||
|
||||
image: str # base64 JPEG
|
||||
# Edge detection (Canny + HoughLinesP)
|
||||
edge_canny_low: int = 50
|
||||
edge_canny_high: int = 150
|
||||
edge_hough_threshold: int = 80
|
||||
edge_hough_min_length: int = 100
|
||||
edge_hough_max_gap: int = 10
|
||||
edge_pair_max_distance: int = 200
|
||||
edge_pair_min_distance: int = 15
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegionBox:
|
||||
"""A candidate region from CV analysis."""
|
||||
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsResponse:
|
||||
"""Response from CV region analysis."""
|
||||
|
||||
regions: List[RegionBox] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeRegionsDebugResponse:
|
||||
"""Response from CV region analysis with debug overlays."""
|
||||
|
||||
regions: List[RegionBox] = field(default_factory=list)
|
||||
edge_overlay_b64: str = "" # Canny edge image as base64 JPEG
|
||||
lines_overlay_b64: str = "" # frame with Hough lines drawn
|
||||
horizontal_count: int = 0
|
||||
pair_count: int = 0
|
||||
|
||||
|
||||
# --- Server Config ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConfigUpdate:
|
||||
"""Request body for updating server configuration."""
|
||||
|
||||
device: Optional[str] = None
|
||||
yolo_model: Optional[str] = None
|
||||
yolo_confidence: Optional[float] = None
|
||||
vram_budget_mb: Optional[int] = None
|
||||
strategy: Optional[str] = None
|
||||
ocr_languages: Optional[List[str]] = None
|
||||
ocr_min_confidence: Optional[float] = None
|
||||
|
||||
|
||||
# --- Export list for modelgen ---
|
||||
|
||||
INFERENCE_VIEWS = [
|
||||
DetectRequest,
|
||||
BBox,
|
||||
DetectResponse,
|
||||
OCRRequest,
|
||||
OCRTextResult,
|
||||
OCRResponse,
|
||||
PreprocessRequest,
|
||||
PreprocessResponse,
|
||||
VLMRequest,
|
||||
VLMResponse,
|
||||
AnalyzeRegionsRequest,
|
||||
RegionBox,
|
||||
AnalyzeRegionsResponse,
|
||||
AnalyzeRegionsDebugResponse,
|
||||
ConfigUpdate,
|
||||
]
|
||||
Reference in New Issue
Block a user