This commit is contained in:
2026-03-23 11:13:30 -03:00
parent 8186bb5fe6
commit 71fd0510de
34 changed files with 1373 additions and 104 deletions

71
detect/profiles/base.py Normal file
View File

@@ -0,0 +1,71 @@
"""
ContentTypeProfile protocol and config dataclasses.
The pipeline graph is fixed — what varies per content type is configuration
and hooks. Each profile provides stage configs, a brand dictionary,
VLM prompt templates, and an aggregation strategy.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Protocol
from detect.models import BrandDetection, DetectionReport
@dataclass
class FrameExtractionConfig:
fps: float = 2.0
max_frames: int = 500
@dataclass
class SceneFilterConfig:
hamming_threshold: int = 8
enabled: bool = True
@dataclass
class DetectionConfig:
model_name: str = "yolov8n.pt"
confidence_threshold: float = 0.3
target_classes: list[str] = field(default_factory=lambda: ["logo", "text"])
@dataclass
class OCRConfig:
languages: list[str] = field(default_factory=lambda: ["en"])
min_confidence: float = 0.5
@dataclass
class ResolverConfig:
fuzzy_threshold: int = 75
@dataclass
class BrandDictionary:
"""Maps canonical brand name → list of known aliases/spellings."""
brands: dict[str, list[str]] = field(default_factory=dict)
@dataclass
class CropContext:
image: bytes
surrounding_text: str = ""
position_hint: str = ""
class ContentTypeProfile(Protocol):
name: str
def frame_extraction_config(self) -> FrameExtractionConfig: ...
def scene_filter_config(self) -> SceneFilterConfig: ...
def detection_config(self) -> DetectionConfig: ...
def ocr_config(self) -> OCRConfig: ...
def brand_dictionary(self) -> BrandDictionary: ...
def resolver_config(self) -> ResolverConfig: ...
def vlm_prompt(self, crop_context: CropContext) -> str: ...
def aggregate(self, detections: list[BrandDetection]) -> DetectionReport: ...
def auxiliary_detections(self, source: str) -> list[BrandDetection]: ...