phase 4
This commit is contained in:
@@ -13,7 +13,7 @@ Basic CRUD (create, get, update, delete) goes directly through the session:
|
||||
|
||||
from .connection import get_session, create_tables
|
||||
|
||||
from .tables import MediaAsset, Job, Timeline, Checkpoint, Brand
|
||||
from .models import MediaAsset, Job, Timeline, Checkpoint, Brand
|
||||
|
||||
from .assets import list_assets, get_asset_filenames
|
||||
from .job import list_jobs
|
||||
|
||||
@@ -7,7 +7,7 @@ from uuid import UUID
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from .tables import MediaAsset
|
||||
from .models import MediaAsset
|
||||
|
||||
|
||||
def list_assets(session: Session, status: Optional[str] = None, search: Optional[str] = None) -> list[MediaAsset]:
|
||||
|
||||
@@ -7,7 +7,7 @@ from uuid import UUID
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from .tables import Brand
|
||||
from .models import Brand
|
||||
|
||||
|
||||
def get_or_create_brand(session: Session, canonical_name: str,
|
||||
|
||||
@@ -6,7 +6,7 @@ from uuid import UUID
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from .tables import Checkpoint
|
||||
from .models import Checkpoint
|
||||
|
||||
|
||||
def get_latest_checkpoint(session: Session, timeline_id: UUID, parent_id: UUID | None = None) -> Checkpoint | None:
|
||||
|
||||
@@ -30,5 +30,5 @@ def get_session() -> Session:
|
||||
def create_tables():
|
||||
"""Create all SQLModel tables."""
|
||||
from sqlmodel import SQLModel
|
||||
from . import tables # noqa — registers all table classes
|
||||
from . import models # noqa — registers all table classes
|
||||
SQLModel.metadata.create_all(get_engine())
|
||||
|
||||
142
core/db/fixtures/soccer_broadcast.json
Normal file
142
core/db/fixtures/soccer_broadcast.json
Normal file
@@ -0,0 +1,142 @@
|
||||
{
|
||||
"name": "soccer_broadcast",
|
||||
"pipeline": {
|
||||
"name": "soccer_broadcast",
|
||||
"profile_name": "soccer_broadcast",
|
||||
"stages": [
|
||||
{
|
||||
"name": "extract_frames",
|
||||
"branch": "trunk"
|
||||
},
|
||||
{
|
||||
"name": "filter_scenes",
|
||||
"branch": "trunk"
|
||||
},
|
||||
{
|
||||
"name": "field_segmentation",
|
||||
"branch": "trunk"
|
||||
},
|
||||
{
|
||||
"name": "detect_edges",
|
||||
"branch": "hoarding"
|
||||
},
|
||||
{
|
||||
"name": "detect_objects",
|
||||
"branch": "objects"
|
||||
},
|
||||
{
|
||||
"name": "preprocess"
|
||||
},
|
||||
{
|
||||
"name": "run_ocr"
|
||||
},
|
||||
{
|
||||
"name": "match_brands"
|
||||
},
|
||||
{
|
||||
"name": "escalate_vlm"
|
||||
},
|
||||
{
|
||||
"name": "escalate_cloud"
|
||||
},
|
||||
{
|
||||
"name": "compile_report"
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"source": "extract_frames",
|
||||
"target": "filter_scenes"
|
||||
},
|
||||
{
|
||||
"source": "filter_scenes",
|
||||
"target": "field_segmentation"
|
||||
},
|
||||
{
|
||||
"source": "field_segmentation",
|
||||
"target": "detect_edges"
|
||||
},
|
||||
{
|
||||
"source": "field_segmentation",
|
||||
"target": "detect_objects"
|
||||
},
|
||||
{
|
||||
"source": "detect_edges",
|
||||
"target": "preprocess"
|
||||
},
|
||||
{
|
||||
"source": "detect_objects",
|
||||
"target": "preprocess"
|
||||
},
|
||||
{
|
||||
"source": "preprocess",
|
||||
"target": "run_ocr"
|
||||
},
|
||||
{
|
||||
"source": "run_ocr",
|
||||
"target": "match_brands"
|
||||
},
|
||||
{
|
||||
"source": "match_brands",
|
||||
"target": "escalate_vlm"
|
||||
},
|
||||
{
|
||||
"source": "escalate_vlm",
|
||||
"target": "escalate_cloud"
|
||||
},
|
||||
{
|
||||
"source": "escalate_cloud",
|
||||
"target": "compile_report"
|
||||
}
|
||||
]
|
||||
},
|
||||
"configs": {
|
||||
"extract_frames": {
|
||||
"fps": 2.0,
|
||||
"max_frames": 500
|
||||
},
|
||||
"filter_scenes": {
|
||||
"hamming_threshold": 8,
|
||||
"enabled": true
|
||||
},
|
||||
"field_segmentation": {
|
||||
"enabled": true,
|
||||
"hue_low": 30,
|
||||
"hue_high": 85,
|
||||
"sat_low": 30,
|
||||
"sat_high": 255,
|
||||
"val_low": 30,
|
||||
"val_high": 255,
|
||||
"morph_kernel": 15,
|
||||
"min_area_ratio": 0.05
|
||||
},
|
||||
"detect_edges": {
|
||||
"enabled": true,
|
||||
"edge_canny_low": 50,
|
||||
"edge_canny_high": 150,
|
||||
"edge_hough_threshold": 80,
|
||||
"edge_hough_min_length": 100,
|
||||
"edge_hough_max_gap": 10,
|
||||
"edge_pair_max_distance": 200,
|
||||
"edge_pair_min_distance": 15
|
||||
},
|
||||
"detect_objects": {
|
||||
"model_name": "yolov8n.pt",
|
||||
"confidence_threshold": 0.3,
|
||||
"target_classes": []
|
||||
},
|
||||
"run_ocr": {
|
||||
"languages": [
|
||||
"en",
|
||||
"es"
|
||||
],
|
||||
"min_confidence": 0.5
|
||||
},
|
||||
"match_brands": {
|
||||
"fuzzy_threshold": 75
|
||||
},
|
||||
"escalate_vlm": {
|
||||
"vlm_prompt_template": "Identify the brand or sponsor visible in this cropped region from a soccer broadcast.{hint}{text} Respond with: brand, confidence (0-1), reasoning."
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@ from uuid import UUID
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from .tables import Job
|
||||
from .models import Job
|
||||
|
||||
|
||||
def list_jobs(session: Session, parent_id: Optional[UUID] = None, status: Optional[str] = None) -> list[Job]:
|
||||
|
||||
@@ -44,7 +44,7 @@ class SourceType(str, Enum):
|
||||
|
||||
class MediaAsset(SQLModel, table=True):
|
||||
"""A video/audio file registered in the system."""
|
||||
__tablename__ = "media_assets"
|
||||
__tablename__ = "media_asset"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
filename: str
|
||||
@@ -67,7 +67,7 @@ class MediaAsset(SQLModel, table=True):
|
||||
|
||||
class TranscodePreset(SQLModel, table=True):
|
||||
"""A reusable transcoding configuration (like Handbrake presets)."""
|
||||
__tablename__ = "transcode_presets"
|
||||
__tablename__ = "transcode_preset"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
name: str
|
||||
@@ -90,12 +90,13 @@ class TranscodePreset(SQLModel, table=True):
|
||||
|
||||
class Job(SQLModel, table=True):
|
||||
"""A pipeline job."""
|
||||
__tablename__ = "jobs"
|
||||
__tablename__ = "job"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
source_asset_id: UUID = Field(index=True)
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
timeline_id: Optional[UUID] = None
|
||||
parent_id: Optional[UUID] = None
|
||||
run_type: RunType = "initial"
|
||||
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
@@ -107,7 +108,6 @@ class Job(SQLModel, table=True):
|
||||
brands_found: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
estimated_cost_usd: float = 0.0
|
||||
celery_task_id: Optional[str] = None
|
||||
priority: int = 0
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
started_at: Optional[datetime] = None
|
||||
@@ -115,7 +115,7 @@ class Job(SQLModel, table=True):
|
||||
|
||||
class Timeline(SQLModel, table=True):
|
||||
"""The frame sequence from a source video."""
|
||||
__tablename__ = "timelines"
|
||||
__tablename__ = "timeline"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
source_asset_id: Optional[UUID] = Field(default=None, index=True)
|
||||
@@ -129,10 +129,11 @@ class Timeline(SQLModel, table=True):
|
||||
|
||||
class Checkpoint(SQLModel, table=True):
|
||||
"""A snapshot of pipeline state on a timeline."""
|
||||
__tablename__ = "checkpoints"
|
||||
__tablename__ = "checkpoint"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
timeline_id: UUID
|
||||
job_id: Optional[UUID] = Field(default=None, index=True)
|
||||
parent_id: Optional[UUID] = None
|
||||
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
@@ -143,7 +144,7 @@ class Checkpoint(SQLModel, table=True):
|
||||
|
||||
class Brand(SQLModel, table=True):
|
||||
"""A brand discovered or registered in the system."""
|
||||
__tablename__ = "brands"
|
||||
__tablename__ = "brand"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
canonical_name: str = Field(index=True)
|
||||
@@ -154,3 +155,12 @@ class Brand(SQLModel, table=True):
|
||||
total_airings: int = 0
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
class Profile(SQLModel, table=True):
|
||||
"""A content type profile."""
|
||||
__tablename__ = "profile"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
name: str
|
||||
pipeline: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
configs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
|
||||
43
core/db/seed.py
Normal file
43
core/db/seed.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Seed data — insert initial profile rows if they don't exist.
|
||||
|
||||
Called on startup after create_tables().
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SEED_DIR = Path(__file__).parent / "fixtures"
|
||||
|
||||
|
||||
def seed_profiles():
|
||||
"""Insert seed profiles from JSON fixtures if not already present."""
|
||||
from .connection import get_session
|
||||
from .models import Profile
|
||||
|
||||
fixtures = list(SEED_DIR.glob("*.json"))
|
||||
if not fixtures:
|
||||
return
|
||||
|
||||
with get_session() as session:
|
||||
for f in fixtures:
|
||||
data = json.loads(f.read_text())
|
||||
name = data["name"]
|
||||
|
||||
existing = session.query(Profile).filter(Profile.name == name).first()
|
||||
if existing:
|
||||
logger.debug("Profile %s already exists, skipping seed", name)
|
||||
continue
|
||||
|
||||
profile = Profile(
|
||||
name=name,
|
||||
pipeline=data.get("pipeline", {}),
|
||||
configs=data.get("configs", {}),
|
||||
)
|
||||
session.add(profile)
|
||||
logger.info("Seeded profile: %s", name)
|
||||
|
||||
session.commit()
|
||||
@@ -1,96 +0,0 @@
|
||||
"""
|
||||
SQLModel table definitions.
|
||||
|
||||
Generated by modelgen from core/schema/models/. Do not edit directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from sqlalchemy import JSON
|
||||
from sqlmodel import Column, Field, SQLModel
|
||||
|
||||
|
||||
class MediaAsset(SQLModel, table=True):
|
||||
__tablename__ = "media_asset"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
filename: str
|
||||
path: str
|
||||
status: str = "pending"
|
||||
size_bytes: int = 0
|
||||
duration_seconds: float = 0.0
|
||||
width: Optional[int] = None
|
||||
height: Optional[int] = None
|
||||
fps: Optional[float] = None
|
||||
codec: Optional[str] = None
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class Job(SQLModel, table=True):
|
||||
__tablename__ = "job"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
source_asset_id: UUID = Field(index=True)
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
parent_id: Optional[UUID] = Field(default=None, index=True)
|
||||
run_type: str = "initial"
|
||||
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
status: str = "pending"
|
||||
current_stage: Optional[str] = None
|
||||
progress: float = 0.0
|
||||
error_message: Optional[str] = None
|
||||
total_detections: int = 0
|
||||
brands_found: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
estimated_cost_usd: float = 0.0
|
||||
priority: int = 0
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class Timeline(SQLModel, table=True):
|
||||
__tablename__ = "timeline"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
source_asset_id: Optional[UUID] = Field(default=None, index=True)
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
frames_prefix: str = ""
|
||||
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class Checkpoint(SQLModel, table=True):
|
||||
__tablename__ = "checkpoint"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
timeline_id: UUID = Field(index=True)
|
||||
parent_id: Optional[UUID] = Field(default=None, index=True)
|
||||
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class Brand(SQLModel, table=True):
|
||||
__tablename__ = "brand"
|
||||
|
||||
id: UUID = Field(default_factory=uuid4, primary_key=True)
|
||||
canonical_name: str = Field(index=True)
|
||||
aliases: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
|
||||
source: str = "ocr"
|
||||
confirmed: bool = False
|
||||
airings: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
|
||||
total_airings: int = 0
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
|
||||
Reference in New Issue
Block a user