This commit is contained in:
2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions

View File

@@ -13,7 +13,7 @@ Basic CRUD (create, get, update, delete) goes directly through the session:
from .connection import get_session, create_tables
from .tables import MediaAsset, Job, Timeline, Checkpoint, Brand
from .models import MediaAsset, Job, Timeline, Checkpoint, Brand
from .assets import list_assets, get_asset_filenames
from .job import list_jobs

View File

@@ -7,7 +7,7 @@ from uuid import UUID
from sqlmodel import Session, select
from .tables import MediaAsset
from .models import MediaAsset
def list_assets(session: Session, status: Optional[str] = None, search: Optional[str] = None) -> list[MediaAsset]:

View File

@@ -7,7 +7,7 @@ from uuid import UUID
from sqlmodel import Session, select
from .tables import Brand
from .models import Brand
def get_or_create_brand(session: Session, canonical_name: str,

View File

@@ -6,7 +6,7 @@ from uuid import UUID
from sqlmodel import Session, select
from .tables import Checkpoint
from .models import Checkpoint
def get_latest_checkpoint(session: Session, timeline_id: UUID, parent_id: UUID | None = None) -> Checkpoint | None:

View File

@@ -30,5 +30,5 @@ def get_session() -> Session:
def create_tables():
"""Create all SQLModel tables."""
from sqlmodel import SQLModel
from . import tables # noqa — registers all table classes
from . import models # noqa — registers all table classes
SQLModel.metadata.create_all(get_engine())

View File

@@ -0,0 +1,142 @@
{
"name": "soccer_broadcast",
"pipeline": {
"name": "soccer_broadcast",
"profile_name": "soccer_broadcast",
"stages": [
{
"name": "extract_frames",
"branch": "trunk"
},
{
"name": "filter_scenes",
"branch": "trunk"
},
{
"name": "field_segmentation",
"branch": "trunk"
},
{
"name": "detect_edges",
"branch": "hoarding"
},
{
"name": "detect_objects",
"branch": "objects"
},
{
"name": "preprocess"
},
{
"name": "run_ocr"
},
{
"name": "match_brands"
},
{
"name": "escalate_vlm"
},
{
"name": "escalate_cloud"
},
{
"name": "compile_report"
}
],
"edges": [
{
"source": "extract_frames",
"target": "filter_scenes"
},
{
"source": "filter_scenes",
"target": "field_segmentation"
},
{
"source": "field_segmentation",
"target": "detect_edges"
},
{
"source": "field_segmentation",
"target": "detect_objects"
},
{
"source": "detect_edges",
"target": "preprocess"
},
{
"source": "detect_objects",
"target": "preprocess"
},
{
"source": "preprocess",
"target": "run_ocr"
},
{
"source": "run_ocr",
"target": "match_brands"
},
{
"source": "match_brands",
"target": "escalate_vlm"
},
{
"source": "escalate_vlm",
"target": "escalate_cloud"
},
{
"source": "escalate_cloud",
"target": "compile_report"
}
]
},
"configs": {
"extract_frames": {
"fps": 2.0,
"max_frames": 500
},
"filter_scenes": {
"hamming_threshold": 8,
"enabled": true
},
"field_segmentation": {
"enabled": true,
"hue_low": 30,
"hue_high": 85,
"sat_low": 30,
"sat_high": 255,
"val_low": 30,
"val_high": 255,
"morph_kernel": 15,
"min_area_ratio": 0.05
},
"detect_edges": {
"enabled": true,
"edge_canny_low": 50,
"edge_canny_high": 150,
"edge_hough_threshold": 80,
"edge_hough_min_length": 100,
"edge_hough_max_gap": 10,
"edge_pair_max_distance": 200,
"edge_pair_min_distance": 15
},
"detect_objects": {
"model_name": "yolov8n.pt",
"confidence_threshold": 0.3,
"target_classes": []
},
"run_ocr": {
"languages": [
"en",
"es"
],
"min_confidence": 0.5
},
"match_brands": {
"fuzzy_threshold": 75
},
"escalate_vlm": {
"vlm_prompt_template": "Identify the brand or sponsor visible in this cropped region from a soccer broadcast.{hint}{text} Respond with: brand, confidence (0-1), reasoning."
}
}
}

View File

@@ -7,7 +7,7 @@ from uuid import UUID
from sqlmodel import Session, select
from .tables import Job
from .models import Job
def list_jobs(session: Session, parent_id: Optional[UUID] = None, status: Optional[str] = None) -> list[Job]:

View File

@@ -44,7 +44,7 @@ class SourceType(str, Enum):
class MediaAsset(SQLModel, table=True):
"""A video/audio file registered in the system."""
__tablename__ = "media_assets"
__tablename__ = "media_asset"
id: UUID = Field(default_factory=uuid4, primary_key=True)
filename: str
@@ -67,7 +67,7 @@ class MediaAsset(SQLModel, table=True):
class TranscodePreset(SQLModel, table=True):
"""A reusable transcoding configuration (like Handbrake presets)."""
__tablename__ = "transcode_presets"
__tablename__ = "transcode_preset"
id: UUID = Field(default_factory=uuid4, primary_key=True)
name: str
@@ -90,12 +90,13 @@ class TranscodePreset(SQLModel, table=True):
class Job(SQLModel, table=True):
"""A pipeline job."""
__tablename__ = "jobs"
__tablename__ = "job"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
video_path: str
profile_name: str = "soccer_broadcast"
timeline_id: Optional[UUID] = None
parent_id: Optional[UUID] = None
run_type: RunType = "initial"
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
@@ -107,7 +108,6 @@ class Job(SQLModel, table=True):
brands_found: int = 0
cloud_llm_calls: int = 0
estimated_cost_usd: float = 0.0
celery_task_id: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
@@ -115,7 +115,7 @@ class Job(SQLModel, table=True):
class Timeline(SQLModel, table=True):
"""The frame sequence from a source video."""
__tablename__ = "timelines"
__tablename__ = "timeline"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: Optional[UUID] = Field(default=None, index=True)
@@ -129,10 +129,11 @@ class Timeline(SQLModel, table=True):
class Checkpoint(SQLModel, table=True):
"""A snapshot of pipeline state on a timeline."""
__tablename__ = "checkpoints"
__tablename__ = "checkpoint"
id: UUID = Field(default_factory=uuid4, primary_key=True)
timeline_id: UUID
job_id: Optional[UUID] = Field(default=None, index=True)
parent_id: Optional[UUID] = None
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
@@ -143,7 +144,7 @@ class Checkpoint(SQLModel, table=True):
class Brand(SQLModel, table=True):
"""A brand discovered or registered in the system."""
__tablename__ = "brands"
__tablename__ = "brand"
id: UUID = Field(default_factory=uuid4, primary_key=True)
canonical_name: str = Field(index=True)
@@ -154,3 +155,12 @@ class Brand(SQLModel, table=True):
total_airings: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Profile(SQLModel, table=True):
"""A content type profile."""
__tablename__ = "profile"
id: UUID = Field(default_factory=uuid4, primary_key=True)
name: str
pipeline: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
configs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))

43
core/db/seed.py Normal file
View File

@@ -0,0 +1,43 @@
"""
Seed data — insert initial profile rows if they don't exist.
Called on startup after create_tables().
"""
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
SEED_DIR = Path(__file__).parent / "fixtures"
def seed_profiles():
"""Insert seed profiles from JSON fixtures if not already present."""
from .connection import get_session
from .models import Profile
fixtures = list(SEED_DIR.glob("*.json"))
if not fixtures:
return
with get_session() as session:
for f in fixtures:
data = json.loads(f.read_text())
name = data["name"]
existing = session.query(Profile).filter(Profile.name == name).first()
if existing:
logger.debug("Profile %s already exists, skipping seed", name)
continue
profile = Profile(
name=name,
pipeline=data.get("pipeline", {}),
configs=data.get("configs", {}),
)
session.add(profile)
logger.info("Seeded profile: %s", name)
session.commit()

View File

@@ -1,96 +0,0 @@
"""
SQLModel table definitions.
Generated by modelgen from core/schema/models/. Do not edit directly.
"""
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional
from uuid import UUID, uuid4
from sqlalchemy import JSON
from sqlmodel import Column, Field, SQLModel
class MediaAsset(SQLModel, table=True):
__tablename__ = "media_asset"
id: UUID = Field(default_factory=uuid4, primary_key=True)
filename: str
path: str
status: str = "pending"
size_bytes: int = 0
duration_seconds: float = 0.0
width: Optional[int] = None
height: Optional[int] = None
fps: Optional[float] = None
codec: Optional[str] = None
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Job(SQLModel, table=True):
__tablename__ = "job"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
video_path: str
profile_name: str = "soccer_broadcast"
parent_id: Optional[UUID] = Field(default=None, index=True)
run_type: str = "initial"
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
status: str = "pending"
current_stage: Optional[str] = None
progress: float = 0.0
error_message: Optional[str] = None
total_detections: int = 0
brands_found: int = 0
cloud_llm_calls: int = 0
estimated_cost_usd: float = 0.0
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class Timeline(SQLModel, table=True):
__tablename__ = "timeline"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: Optional[UUID] = Field(default=None, index=True)
source_video: str = ""
profile_name: str = ""
fps: float = 2.0
frames_prefix: str = ""
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Checkpoint(SQLModel, table=True):
__tablename__ = "checkpoint"
id: UUID = Field(default_factory=uuid4, primary_key=True)
timeline_id: UUID = Field(index=True)
parent_id: Optional[UUID] = Field(default=None, index=True)
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
is_scenario: bool = False
scenario_label: str = ""
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Brand(SQLModel, table=True):
__tablename__ = "brand"
id: UUID = Field(default_factory=uuid4, primary_key=True)
canonical_name: str = Field(index=True)
aliases: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
source: str = "ocr"
confirmed: bool = False
airings: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
total_airings: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)