phase 2
This commit is contained in:
71
detect/profiles/base.py
Normal file
71
detect/profiles/base.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
ContentTypeProfile protocol and config dataclasses.
|
||||
|
||||
The pipeline graph is fixed — what varies per content type is configuration
|
||||
and hooks. Each profile provides stage configs, a brand dictionary,
|
||||
VLM prompt templates, and an aggregation strategy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Protocol
|
||||
|
||||
from detect.models import BrandDetection, DetectionReport
|
||||
|
||||
|
||||
@dataclass
|
||||
class FrameExtractionConfig:
|
||||
fps: float = 2.0
|
||||
max_frames: int = 500
|
||||
|
||||
|
||||
@dataclass
|
||||
class SceneFilterConfig:
|
||||
hamming_threshold: int = 8
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionConfig:
|
||||
model_name: str = "yolov8n.pt"
|
||||
confidence_threshold: float = 0.3
|
||||
target_classes: list[str] = field(default_factory=lambda: ["logo", "text"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRConfig:
|
||||
languages: list[str] = field(default_factory=lambda: ["en"])
|
||||
min_confidence: float = 0.5
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolverConfig:
|
||||
fuzzy_threshold: int = 75
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrandDictionary:
|
||||
"""Maps canonical brand name → list of known aliases/spellings."""
|
||||
brands: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CropContext:
|
||||
image: bytes
|
||||
surrounding_text: str = ""
|
||||
position_hint: str = ""
|
||||
|
||||
|
||||
class ContentTypeProfile(Protocol):
|
||||
name: str
|
||||
|
||||
def frame_extraction_config(self) -> FrameExtractionConfig: ...
|
||||
def scene_filter_config(self) -> SceneFilterConfig: ...
|
||||
def detection_config(self) -> DetectionConfig: ...
|
||||
def ocr_config(self) -> OCRConfig: ...
|
||||
def brand_dictionary(self) -> BrandDictionary: ...
|
||||
def resolver_config(self) -> ResolverConfig: ...
|
||||
def vlm_prompt(self, crop_context: CropContext) -> str: ...
|
||||
def aggregate(self, detections: list[BrandDetection]) -> DetectionReport: ...
|
||||
def auxiliary_detections(self, source: str) -> list[BrandDetection]: ...
|
||||
Reference in New Issue
Block a user