move to postgresql

This commit is contained in:
2026-03-26 10:27:07 -03:00
parent c9ba9e4f5f
commit a85722f96a
20 changed files with 800 additions and 234 deletions

View File

@@ -274,32 +274,3 @@ class SourceBrandSighting(models.Model):
def __str__(self): def __str__(self):
return str(self.id) return str(self.id)
class SourceJob(models.Model):
"""A group of chunks that belong together (same source video/session)."""
job_id = models.CharField(max_length=255)
source_type = models.CharField(max_length=255)
chunk_count = models.IntegerField()
total_bytes = models.IntegerField(default=0)
class Meta:
pass
def __str__(self):
return str(self.id)
class ChunkInfo(models.Model):
"""A single chunk (video segment) stored in blob storage."""
filename = models.CharField(max_length=500)
key = models.CharField(max_length=255)
size_bytes = models.IntegerField()
class Meta:
pass
def __str__(self):
return self.filename

View File

@@ -12,12 +12,7 @@ from uuid import UUID
# Add project root to path # Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
# Initialize Django before importing models from contextlib import asynccontextmanager
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "admin.mpr.settings")
import django
django.setup()
from fastapi import FastAPI, Header, HTTPException from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
@@ -32,12 +27,22 @@ from core.api.graphql import schema as graphql_schema
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "") CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@asynccontextmanager
async def lifespan(app):
# Create/reset DB tables on startup
from core.db.connection import create_tables
create_tables()
yield
app = FastAPI( app = FastAPI(
title="MPR API", title="MPR API",
description="Media Processor — GraphQL API", description="Media Processor — GraphQL API",
version="0.1.0", version="0.1.0",
docs_url="/docs", docs_url="/docs",
redoc_url="/redoc", redoc_url="/redoc",
lifespan=lifespan,
) )
# CORS # CORS

View File

@@ -17,3 +17,4 @@ from .presets import (
get_preset, get_preset,
list_presets, list_presets,
) )
from .connection import get_session, create_tables

View File

@@ -1,48 +1,58 @@
"""Database operations for MediaAsset.""" """Database operations for MediaAsset — SQLModel."""
from __future__ import annotations
from typing import Optional from typing import Optional
from uuid import UUID from uuid import UUID
from sqlmodel import select
def list_assets(status: Optional[str] = None, search: Optional[str] = None): from .connection import get_session
from admin.mpr.media_assets.models import MediaAsset from .models import MediaAsset
qs = MediaAsset.objects.all()
def list_assets(status: Optional[str] = None, search: Optional[str] = None) -> list[MediaAsset]:
with get_session() as session:
stmt = select(MediaAsset)
if status: if status:
qs = qs.filter(status=status) stmt = stmt.where(MediaAsset.status == status)
if search: if search:
qs = qs.filter(filename__icontains=search) stmt = stmt.where(MediaAsset.filename.ilike(f"%{search}%"))
return list(qs) return list(session.exec(stmt).all())
def get_asset(id: UUID): def get_asset(id: UUID) -> MediaAsset | None:
from admin.mpr.media_assets.models import MediaAsset with get_session() as session:
return session.get(MediaAsset, id)
return MediaAsset.objects.get(id=id)
def get_asset_filenames() -> set[str]: def get_asset_filenames() -> set[str]:
from admin.mpr.media_assets.models import MediaAsset with get_session() as session:
return set(session.exec(select(MediaAsset.filename)).all())
return set(MediaAsset.objects.values_list("filename", flat=True))
def create_asset(*, filename: str, file_path: str, file_size: int): def create_asset(*, filename: str, file_path: str, file_size: int) -> MediaAsset:
from admin.mpr.media_assets.models import MediaAsset asset = MediaAsset(filename=filename, file_path=file_path, file_size=file_size)
with get_session() as session:
return MediaAsset.objects.create( session.add(asset)
filename=filename, session.commit()
file_path=file_path, session.refresh(asset)
file_size=file_size,
)
def update_asset(asset, **fields):
for key, value in fields.items():
setattr(asset, key, value)
asset.save(update_fields=list(fields.keys()))
return asset return asset
def delete_asset(asset): def update_asset(id: UUID, **fields) -> None:
asset.delete() with get_session() as session:
asset = session.get(MediaAsset, id)
if not asset:
return
for k, v in fields.items():
setattr(asset, k, v)
session.commit()
def delete_asset(id: UUID) -> None:
with get_session() as session:
asset = session.get(MediaAsset, id)
if asset:
session.delete(asset)
session.commit()

33
core/db/connection.py Normal file
View File

@@ -0,0 +1,33 @@
"""
Database engine and session — SQLModel/SQLAlchemy, no Django.
Reads DATABASE_URL from the environment.
"""
from __future__ import annotations
import os
from sqlalchemy import create_engine
from sqlmodel import Session
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://mpr:mpr@localhost:5432/mpr")
_engine = None
def get_engine():
global _engine
if _engine is None:
_engine = create_engine(DATABASE_URL, pool_size=5, max_overflow=10)
return _engine
def get_session() -> Session:
return Session(get_engine())
def create_tables():
"""Create all SQLModel tables."""
from .models import SQLModel # noqa — registers all models
SQLModel.metadata.create_all(get_engine())

View File

@@ -1,168 +1,203 @@
"""Database operations for DetectJob and StageCheckpoint.""" """Database operations for detection pipeline — SQLModel."""
from __future__ import annotations
from typing import Optional from typing import Optional
from uuid import UUID from uuid import UUID
from sqlmodel import select
from .connection import get_session
from .models import (
DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# DetectJob # DetectJob
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def create_detect_job(**fields): def create_detect_job(**fields) -> DetectJob:
from admin.mpr.media_assets.models import DetectJob job = DetectJob(**fields)
return DetectJob.objects.create(**fields) with get_session() as session:
session.add(job)
session.commit()
session.refresh(job)
return job
def get_detect_job(id: UUID): def get_detect_job(id: UUID) -> DetectJob | None:
from admin.mpr.media_assets.models import DetectJob with get_session() as session:
return DetectJob.objects.get(id=id) return session.get(DetectJob, id)
def update_detect_job(job_id: UUID, **fields): def update_detect_job(job_id: UUID, **fields) -> None:
from admin.mpr.media_assets.models import DetectJob with get_session() as session:
DetectJob.objects.filter(id=job_id).update(**fields) job = session.get(DetectJob, job_id)
if not job:
return
for k, v in fields.items():
setattr(job, k, v)
session.commit()
def list_detect_jobs( def list_detect_jobs(
parent_job_id: Optional[UUID] = None, parent_job_id: Optional[UUID] = None,
status: Optional[str] = None, status: Optional[str] = None,
): ) -> list[DetectJob]:
from admin.mpr.media_assets.models import DetectJob with get_session() as session:
stmt = select(DetectJob)
qs = DetectJob.objects.all()
if parent_job_id: if parent_job_id:
qs = qs.filter(parent_job_id=parent_job_id) stmt = stmt.where(DetectJob.parent_job_id == parent_job_id)
if status: if status:
qs = qs.filter(status=status) stmt = stmt.where(DetectJob.status == status)
return list(qs) return list(session.exec(stmt).all())
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# StageCheckpoint # StageCheckpoint
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def save_stage_checkpoint(**fields): def save_stage_checkpoint(**fields) -> StageCheckpoint:
from admin.mpr.media_assets.models import StageCheckpoint with get_session() as session:
return StageCheckpoint.objects.create(**fields) # Upsert: replace if same job_id + stage
job_id = fields.get("job_id")
stage = fields.get("stage")
if job_id and stage:
stmt = select(StageCheckpoint).where(
StageCheckpoint.job_id == job_id,
StageCheckpoint.stage == stage,
)
existing = session.exec(stmt).first()
if existing:
for k, v in fields.items():
setattr(existing, k, v)
session.commit()
session.refresh(existing)
return existing
checkpoint = StageCheckpoint(**fields)
session.add(checkpoint)
session.commit()
session.refresh(checkpoint)
return checkpoint
def get_stage_checkpoint(job_id: UUID, stage: str): def get_stage_checkpoint(job_id: UUID, stage: str) -> StageCheckpoint | None:
from admin.mpr.media_assets.models import StageCheckpoint with get_session() as session:
return StageCheckpoint.objects.get(job_id=job_id, stage=stage) stmt = select(StageCheckpoint).where(
StageCheckpoint.job_id == job_id,
StageCheckpoint.stage == stage,
)
return session.exec(stmt).first()
def list_stage_checkpoints(job_id: UUID) -> list[str]: def list_stage_checkpoints(job_id: UUID) -> list[str]:
from admin.mpr.media_assets.models import StageCheckpoint with get_session() as session:
stmt = (
stages = ( select(StageCheckpoint.stage)
StageCheckpoint.objects .where(StageCheckpoint.job_id == job_id)
.filter(job_id=job_id) .order_by(StageCheckpoint.stage_index)
.order_by("stage_index")
.values_list("stage", flat=True)
) )
return list(stages) return list(session.exec(stmt).all())
def delete_stage_checkpoints(job_id: UUID): def delete_stage_checkpoints(job_id: UUID) -> None:
from admin.mpr.media_assets.models import StageCheckpoint with get_session() as session:
StageCheckpoint.objects.filter(job_id=job_id).delete() stmt = select(StageCheckpoint).where(StageCheckpoint.job_id == job_id)
for cp in session.exec(stmt).all():
session.delete(cp)
session.commit()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# KnownBrand # KnownBrand
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def get_or_create_brand(canonical_name: str, aliases: list[str] | None = None, def get_or_create_brand(canonical_name: str, aliases: Optional[list[str]] = None,
source: str = "ocr") -> tuple: source: str = "ocr") -> tuple[KnownBrand, bool]:
"""Get existing brand or create new one. Returns (brand, created)."""
from admin.mpr.media_assets.models import KnownBrand
import uuid
normalized = canonical_name.strip() normalized = canonical_name.strip()
brand = KnownBrand.objects.filter(canonical_name__iexact=normalized).first() with get_session() as session:
stmt = select(KnownBrand).where(KnownBrand.canonical_name.ilike(normalized))
brand = session.exec(stmt).first()
if brand: if brand:
return brand, False return brand, False
# Check aliases of existing brands brand = KnownBrand(
for existing in KnownBrand.objects.all():
existing_aliases = [a.lower() for a in (existing.aliases or [])]
if normalized.lower() in existing_aliases:
return existing, False
brand = KnownBrand.objects.create(
id=uuid.uuid4(),
canonical_name=normalized, canonical_name=normalized,
aliases=aliases or [], aliases=aliases or [],
first_source=source, first_source=source,
) )
session.add(brand)
session.commit()
session.refresh(brand)
return brand, True return brand, True
def find_brand_by_text(text: str) -> Optional[object]: def find_brand_by_text(text: str) -> KnownBrand | None:
"""Find a known brand by canonical name or alias (case-insensitive)."""
from admin.mpr.media_assets.models import KnownBrand
normalized = text.strip().lower() normalized = text.strip().lower()
with get_session() as session:
# Exact canonical match stmt = select(KnownBrand).where(KnownBrand.canonical_name.ilike(normalized))
brand = KnownBrand.objects.filter(canonical_name__iexact=normalized).first() brand = session.exec(stmt).first()
if brand: if brand:
return brand return brand
# Search aliases (jsonb contains) # Alias search — check if normalized is in any brand's aliases
for brand in KnownBrand.objects.all(): all_brands = session.exec(select(KnownBrand)).all()
brand_aliases = [a.lower() for a in (brand.aliases or [])] for b in all_brands:
if normalized in brand_aliases: if normalized in [a.lower() for a in (b.aliases or [])]:
return brand return b
return None return None
def list_all_brands() -> list: def list_all_brands() -> list[KnownBrand]:
from admin.mpr.media_assets.models import KnownBrand with get_session() as session:
return list(KnownBrand.objects.all().order_by("canonical_name")) return list(session.exec(select(KnownBrand).order_by(KnownBrand.canonical_name)).all())
def update_brand(brand_id: UUID, **fields): def update_brand(brand_id: UUID, **fields) -> None:
from admin.mpr.media_assets.models import KnownBrand with get_session() as session:
KnownBrand.objects.filter(id=brand_id).update(**fields) brand = session.get(KnownBrand, brand_id)
if not brand:
return
for k, v in fields.items():
setattr(brand, k, v)
session.commit()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# SourceBrandSighting # SourceBrandSighting
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def get_source_sightings(source_asset_id: UUID) -> list: def get_source_sightings(source_asset_id: UUID) -> list[SourceBrandSighting]:
"""Get all brand sightings for a specific source video.""" with get_session() as session:
from admin.mpr.media_assets.models import SourceBrandSighting stmt = (
return list( select(SourceBrandSighting)
SourceBrandSighting.objects .where(SourceBrandSighting.source_asset_id == source_asset_id)
.filter(source_asset_id=source_asset_id) .order_by(SourceBrandSighting.occurrences.desc())
.order_by("-occurrences")
) )
return list(session.exec(stmt).all())
def record_sighting(source_asset_id: UUID, brand_id: UUID, brand_name: str, def record_sighting(source_asset_id: UUID, brand_id: UUID, brand_name: str,
timestamp: float, confidence: float, source: str = "ocr"): timestamp: float, confidence: float, source: str = "ocr") -> SourceBrandSighting:
"""Record or update a brand sighting for a source.""" with get_session() as session:
from admin.mpr.media_assets.models import SourceBrandSighting stmt = select(SourceBrandSighting).where(
import uuid SourceBrandSighting.source_asset_id == source_asset_id,
SourceBrandSighting.brand_id == brand_id,
sighting = SourceBrandSighting.objects.filter( )
source_asset_id=source_asset_id, sighting = session.exec(stmt).first()
brand_id=brand_id,
).first()
if sighting: if sighting:
total_conf = sighting.avg_confidence * sighting.occurrences + confidence
sighting.occurrences += 1 sighting.occurrences += 1
sighting.last_seen_timestamp = timestamp sighting.last_seen_timestamp = timestamp
total_conf = sighting.avg_confidence * (sighting.occurrences - 1) + confidence
sighting.avg_confidence = total_conf / sighting.occurrences sighting.avg_confidence = total_conf / sighting.occurrences
sighting.save() session.commit()
session.refresh(sighting)
return sighting return sighting
sighting = SourceBrandSighting.objects.create( sighting = SourceBrandSighting(
id=uuid.uuid4(),
source_asset_id=source_asset_id, source_asset_id=source_asset_id,
brand_id=brand_id, brand_id=brand_id,
brand_name=brand_name, brand_name=brand_name,
@@ -172,4 +207,7 @@ def record_sighting(source_asset_id: UUID, brand_id: UUID, brand_name: str,
detection_source=source, detection_source=source,
avg_confidence=confidence, avg_confidence=confidence,
) )
session.add(sighting)
session.commit()
session.refresh(sighting)
return sighting return sighting

View File

@@ -1,40 +1,49 @@
"""Database operations for TranscodeJob.""" """Database operations for TranscodeJob — SQLModel."""
from __future__ import annotations
from typing import Optional from typing import Optional
from uuid import UUID from uuid import UUID
from sqlmodel import select
def list_jobs(status: Optional[str] = None, source_asset_id: Optional[UUID] = None): from .connection import get_session
from admin.mpr.media_assets.models import TranscodeJob from .models import TranscodeJob
qs = TranscodeJob.objects.all()
def list_jobs(status: Optional[str] = None, source_asset_id: Optional[UUID] = None) -> list[TranscodeJob]:
with get_session() as session:
stmt = select(TranscodeJob)
if status: if status:
qs = qs.filter(status=status) stmt = stmt.where(TranscodeJob.status == status)
if source_asset_id: if source_asset_id:
qs = qs.filter(source_asset_id=source_asset_id) stmt = stmt.where(TranscodeJob.source_asset_id == source_asset_id)
return list(qs) return list(session.exec(stmt).all())
def get_job(id: UUID): def get_job(id: UUID) -> TranscodeJob | None:
from admin.mpr.media_assets.models import TranscodeJob with get_session() as session:
return session.get(TranscodeJob, id)
return TranscodeJob.objects.get(id=id)
def create_job(**fields): def create_job(**fields) -> TranscodeJob:
from admin.mpr.media_assets.models import TranscodeJob job = TranscodeJob(**fields)
with get_session() as session:
return TranscodeJob.objects.create(**fields) session.add(job)
session.commit()
session.refresh(job)
def update_job(job, **fields):
for key, value in fields.items():
setattr(job, key, value)
job.save(update_fields=list(fields.keys()))
return job return job
def update_job_fields(job_id, **fields): def update_job(id: UUID, **fields) -> None:
from admin.mpr.media_assets.models import TranscodeJob with get_session() as session:
job = session.get(TranscodeJob, id)
if not job:
return
for k, v in fields.items():
setattr(job, k, v)
session.commit()
TranscodeJob.objects.filter(id=job_id).update(**fields)
def update_job_fields(job_id: UUID, **fields) -> None:
update_job(job_id, **fields)

233
core/db/models.py Normal file
View File

@@ -0,0 +1,233 @@
"""
SQLModel Table Models - GENERATED FILE
Do not edit directly. Regenerate using modelgen.
"""
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID, uuid4
from sqlmodel import SQLModel, Field, Column
from sqlalchemy import JSON
class AssetStatus(str, Enum):
PENDING = "pending"
READY = "ready"
ERROR = "error"
class JobStatus(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class ChunkJobStatus(str, Enum):
PENDING = "pending"
CHUNKING = "chunking"
PROCESSING = "processing"
COLLECTING = "collecting"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class DetectJobStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
PAUSED = "paused"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class RunType(str, Enum):
INITIAL = "initial"
REPLAY = "replay"
RETRY = "retry"
class BrandSource(str, Enum):
OCR = "ocr"
VLM = "local_vlm"
CLOUD = "cloud_llm"
MANUAL = "manual"
class SourceType(str, Enum):
CHUNK_JOB = "chunk_job"
UPLOAD = "upload"
DEVICE = "device"
STREAM = "stream"
class MediaAsset(SQLModel, table=True):
"""A video/audio file registered in the system."""
__tablename__ = "media_assets"
id: UUID = Field(default_factory=uuid4, primary_key=True)
filename: str
file_path: str
status: AssetStatus = "pending"
error_message: Optional[str] = None
file_size: Optional[int] = None
duration: Optional[float] = None
video_codec: Optional[str] = None
audio_codec: Optional[str] = None
width: Optional[int] = None
height: Optional[int] = None
framerate: Optional[float] = None
bitrate: Optional[int] = None
properties: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
comments: str = ""
tags: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class TranscodePreset(SQLModel, table=True):
"""A reusable transcoding configuration (like Handbrake presets)."""
__tablename__ = "transcode_presets"
id: UUID = Field(default_factory=uuid4, primary_key=True)
name: str
description: str = ""
is_builtin: bool = False
container: str = "mp4"
video_codec: str = "libx264"
video_bitrate: Optional[str] = None
video_crf: Optional[int] = None
video_preset: Optional[str] = None
resolution: Optional[str] = None
framerate: Optional[float] = None
audio_codec: str = "aac"
audio_bitrate: Optional[str] = None
audio_channels: Optional[int] = None
audio_samplerate: Optional[int] = None
extra_args: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class TranscodeJob(SQLModel, table=True):
"""A transcoding or trimming job in the queue."""
__tablename__ = "transcode_jobs"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
preset_id: Optional[UUID] = None
preset_snapshot: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
trim_start: Optional[float] = None
trim_end: Optional[float] = None
output_filename: str = ""
output_path: Optional[str] = None
output_asset_id: Optional[UUID] = None
status: JobStatus = "pending"
progress: float = 0.0
current_frame: Optional[int] = None
current_time: Optional[float] = None
speed: Optional[str] = None
error_message: Optional[str] = None
celery_task_id: Optional[str] = None
execution_arn: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class ChunkJob(SQLModel, table=True):
"""A chunk pipeline job — splits a media file into chunks and processes them"""
__tablename__ = "chunk_jobs"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
chunk_duration: float = 10.0
num_workers: int = 4
max_retries: int = 3
processor_type: str = "ffmpeg"
status: ChunkJobStatus = "pending"
progress: float = 0.0
total_chunks: int = 0
processed_chunks: int = 0
failed_chunks: int = 0
retry_count: int = 0
error_message: Optional[str] = None
throughput_mbps: Optional[float] = None
elapsed_seconds: Optional[float] = None
celery_task_id: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class DetectJob(SQLModel, table=True):
"""A detection pipeline job."""
__tablename__ = "detect_jobs"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
video_path: str
profile_name: str = "soccer_broadcast"
parent_job_id: Optional[UUID] = Field(default=None, index=True)
run_type: RunType = "initial"
replay_from_stage: Optional[str] = None
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
status: DetectJobStatus = "pending"
current_stage: Optional[str] = None
progress: float = 0.0
error_message: Optional[str] = None
total_detections: int = 0
brands_found: int = 0
cloud_llm_calls: int = 0
estimated_cost_usd: float = 0.0
celery_task_id: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class StageCheckpoint(SQLModel, table=True):
"""A checkpoint saved after a pipeline stage completes."""
__tablename__ = "stage_checkpoints"
id: UUID = Field(default_factory=uuid4, primary_key=True)
job_id: UUID = Field(index=True)
stage: str
stage_index: int
frames_prefix: str = ""
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
filtered_frame_sequences: List[int] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
boxes_by_frame: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
text_candidates: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
unresolved_candidates: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
detections: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
config_snapshot: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
video_path: str = ""
profile_name: str = ""
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class KnownBrand(SQLModel, table=True):
"""A brand discovered or registered in the system."""
__tablename__ = "known_brands"
id: UUID = Field(default_factory=uuid4, primary_key=True)
canonical_name: str = Field(index=True)
aliases: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
first_source: BrandSource = "ocr"
total_occurrences: int = 0
confirmed: bool = False
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class SourceBrandSighting(SQLModel, table=True):
"""A brand seen in a specific source (video/asset)."""
__tablename__ = "source_brand_sightings"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
brand_id: UUID
brand_name: str
first_seen_timestamp: float = 0.0
last_seen_timestamp: float = 0.0
occurrences: int = 0
detection_source: BrandSource = "ocr"
avg_confidence: float = 0.0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)

View File

@@ -1,15 +1,20 @@
"""Database operations for TranscodePreset.""" """Database operations for TranscodePreset — SQLModel."""
from __future__ import annotations
from uuid import UUID from uuid import UUID
from sqlmodel import select
def list_presets(): from .connection import get_session
from admin.mpr.media_assets.models import TranscodePreset from .models import TranscodePreset
return list(TranscodePreset.objects.all())
def get_preset(id: UUID): def list_presets() -> list[TranscodePreset]:
from admin.mpr.media_assets.models import TranscodePreset with get_session() as session:
return list(session.exec(select(TranscodePreset)).all())
return TranscodePreset.objects.get(id=id)
def get_preset(id: UUID) -> TranscodePreset | None:
with get_session() as session:
return session.get(TranscodePreset, id)

View File

@@ -6,6 +6,11 @@
"output": "admin/mpr/media_assets/models.py", "output": "admin/mpr/media_assets/models.py",
"include": ["dataclasses", "enums"] "include": ["dataclasses", "enums"]
}, },
{
"target": "sqlmodel",
"output": "core/db/models.py",
"include": ["dataclasses", "enums"]
},
{ {
"target": "graphene", "target": "graphene",
"output": "core/api/schema/graphql.py", "output": "core/api/schema/graphql.py",

View File

@@ -37,10 +37,9 @@ from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
from .sources import ChunkInfo, SourceJob, SourceType from .sources import ChunkInfo, SourceJob, SourceType
# Core domain models - generates Django, Pydantic, TypeScript # Core domain models - generates Django, SQLModel, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob, DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting, DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting]
SourceJob, ChunkInfo]
# API request/response models - generates TypeScript only (no Django) # API request/response models - generates TypeScript only (no Django)
# WorkerStatus from grpc.py is reused here # WorkerStatus from grpc.py is reused here
@@ -51,6 +50,8 @@ API_MODELS = [
ScanResult, ScanResult,
DeleteResult, DeleteResult,
WorkerStatus, WorkerStatus,
SourceJob,
ChunkInfo,
] ]
# Status enums - included in generated code # Status enums - included in generated code

View File

@@ -37,7 +37,8 @@ docker_build(
k8s_resource('redis') k8s_resource('redis')
k8s_resource('minio', port_forwards=['9000:9000', '9001:9001']) k8s_resource('minio', port_forwards=['9000:9000', '9001:9001'])
k8s_resource('fastapi', resource_deps=['redis', 'minio']) k8s_resource('postgres')
k8s_resource('fastapi', resource_deps=['redis', 'minio', 'postgres'])
k8s_resource('detection-ui') k8s_resource('detection-ui')
k8s_resource('gateway', resource_deps=['fastapi', 'detection-ui'], k8s_resource('gateway', resource_deps=['fastapi', 'detection-ui'],
port_forwards=['8080:8080']) port_forwards=['8080:8080'])
@@ -45,6 +46,6 @@ k8s_resource('gateway', resource_deps=['fastapi', 'detection-ui'],
# Group uncategorized resources (configmaps, namespace) under infra # Group uncategorized resources (configmaps, namespace) under infra
k8s_resource( k8s_resource(
objects=['mpr:namespace', 'mpr-config:configmap', 'minio-config:configmap', objects=['mpr:namespace', 'mpr-config:configmap', 'minio-config:configmap',
'envoy-gateway-config:configmap'], 'postgres-config:configmap', 'envoy-gateway-config:configmap'],
new_name='infra', new_name='infra',
) )

View File

@@ -24,6 +24,8 @@ spec:
name: mpr-config name: mpr-config
- configMapRef: - configMapRef:
name: minio-config name: minio-config
- configMapRef:
name: postgres-config
readinessProbe: readinessProbe:
httpGet: httpGet:
path: /health path: /health

View File

@@ -8,6 +8,7 @@ resources:
- configmap.yaml - configmap.yaml
- redis.yaml - redis.yaml
- minio.yaml - minio.yaml
- postgres.yaml
- fastapi.yaml - fastapi.yaml
- detection-ui.yaml - detection-ui.yaml
- gateway.yaml - gateway.yaml

View File

@@ -0,0 +1,63 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-config
namespace: mpr
data:
POSTGRES_DB: mpr
POSTGRES_USER: mpr
POSTGRES_PASSWORD: mpr
DATABASE_URL: postgresql://mpr:mpr@postgres:5432/mpr
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: postgres
namespace: mpr
spec:
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
envFrom:
- configMapRef:
name: postgres-config
readinessProbe:
exec:
command: ["pg_isready", "-U", "mpr"]
initialDelaySeconds: 5
periodSeconds: 10
resources:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 512Mi
volumeMounts:
- name: data
mountPath: /var/lib/postgresql/data
volumes:
- name: data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: postgres
namespace: mpr
spec:
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432

View File

@@ -13,7 +13,7 @@ from detect.models import Frame
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
BUCKET = os.environ.get("S3_BUCKET_OUT", "out") BUCKET = os.environ.get("S3_BUCKET", "mpr")
CHECKPOINT_PREFIX = "checkpoints" CHECKPOINT_PREFIX = "checkpoints"

View File

@@ -24,13 +24,14 @@ logger = logging.getLogger(__name__)
def _has_db() -> bool: def _has_db() -> bool:
"""Check if the DB layer is available (Django + models generated by modelgen).""" """Check if Postgres is reachable."""
try: try:
from core.db.detect import get_stage_checkpoint as _ from core.db.connection import get_session
# Quick check that the model exists (modelgen may not have run yet) from sqlmodel import text
from admin.mpr.media_assets.models import StageCheckpoint as _ with get_session() as session:
session.exec(text("SELECT 1"))
return True return True
except (ImportError, Exception): except Exception:
return False return False
@@ -69,17 +70,13 @@ def save_checkpoint(
def _save_to_db(job_id: str, stage: str, stage_index: int, data: dict) -> str: def _save_to_db(job_id: str, stage: str, stage_index: int, data: dict) -> str:
"""Save checkpoint structured data to Postgres via core/db.""" """Save checkpoint structured data to Postgres."""
import uuid
from core.db.detect import save_stage_checkpoint from core.db.detect import save_stage_checkpoint
job_uuid = uuid.UUID(job_id) if isinstance(job_id, str) else job_id
checkpoint_id = uuid.uuid4()
frames_prefix = f"{CHECKPOINT_PREFIX}/{job_id}/frames/" frames_prefix = f"{CHECKPOINT_PREFIX}/{job_id}/frames/"
checkpoint = save_stage_checkpoint( checkpoint = save_stage_checkpoint(
id=checkpoint_id, job_id=job_id,
job_id=job_uuid,
stage=stage, stage=stage,
stage_index=stage_index, stage_index=stage_index,
frames_prefix=frames_prefix, frames_prefix=frames_prefix,

View File

@@ -17,6 +17,7 @@ from .django import DjangoGenerator
from .prisma import PrismaGenerator from .prisma import PrismaGenerator
from .protobuf import ProtobufGenerator from .protobuf import ProtobufGenerator
from .pydantic import PydanticGenerator from .pydantic import PydanticGenerator
from .sqlmodel import SQLModelGenerator
from .strawberry import StrawberryGenerator from .strawberry import StrawberryGenerator
from .typescript import TypeScriptGenerator from .typescript import TypeScriptGenerator
@@ -24,6 +25,7 @@ from .typescript import TypeScriptGenerator
GENERATORS: Dict[str, Type[BaseGenerator]] = { GENERATORS: Dict[str, Type[BaseGenerator]] = {
"pydantic": PydanticGenerator, "pydantic": PydanticGenerator,
"django": DjangoGenerator, "django": DjangoGenerator,
"sqlmodel": SQLModelGenerator,
"typescript": TypeScriptGenerator, "typescript": TypeScriptGenerator,
"ts": TypeScriptGenerator, # Alias "ts": TypeScriptGenerator, # Alias
"protobuf": ProtobufGenerator, "protobuf": ProtobufGenerator,

View File

@@ -0,0 +1,186 @@
"""
SQLModel Generator
Generates SQLModel table classes from model definitions.
Extends the Pydantic generator — SQLModel classes *are* Pydantic models
with table=True and SQLAlchemy column config for JSON fields.
"""
import dataclasses as dc
import re
from enum import Enum
from typing import Any, List, get_type_hints
from ..helpers import get_origin_name, get_type_name, unwrap_optional
from .pydantic import PydanticGenerator
# ---------------------------------------------------------------------------
# Field resolvers — each returns a Field() string or None to fall through
# ---------------------------------------------------------------------------
def _resolve_special(name, _base, _origin, _optional, _default):
"""id, created_at, updated_at get fixed Field() definitions."""
specials = {
"id": "Field(default_factory=uuid4, primary_key=True)",
"created_at": "Field(default_factory=datetime.utcnow)",
"updated_at": "Field(default_factory=datetime.utcnow)",
}
return specials.get(name)
def _resolve_json(name, _base, origin, _optional, _default):
"""Dict and List fields → sa_column=Column(JSON)."""
mapping = {
"dict": ("dict", "{}"),
"list": ("list", "[]"),
}
entry = mapping.get(origin)
if not entry:
return None
factory, server_default = entry
return (
f"Field(default_factory={factory}, "
f"sa_column=Column(JSON, nullable=False, server_default='{server_default}'))"
)
def _resolve_indexed(name, _base, _origin, optional, _default):
"""Known indexed fields."""
indexed = {"source_asset_id", "parent_job_id", "job_id", "canonical_name"}
if name not in indexed:
return None
if optional:
return "Field(default=None, index=True)"
return "Field(index=True)"
def _resolve_optional(_name, _base, _origin, optional, _default):
"""Optional fields default to None."""
if optional:
return "None"
return None
def _resolve_default(_name, _base, _origin, _optional, default):
"""Fields with explicit defaults. Enum before str (str enums are both)."""
if default is dc.MISSING or default is None:
return None
if isinstance(default, Enum):
return f'"{default.value}"'
if isinstance(default, bool):
return str(default)
if isinstance(default, (int, float)):
return str(default)
if isinstance(default, str):
return f'"{default}"'
return None
# Resolver chain — first non-None result wins
_FIELD_RESOLVERS = [
_resolve_special,
_resolve_json,
_resolve_indexed,
_resolve_optional,
_resolve_default,
]
def _resolve_field(name, type_hint, default):
"""Run the resolver chain for a field. Returns ' = ...' string."""
base, is_optional = unwrap_optional(type_hint)
origin = get_origin_name(base)
for resolver in _FIELD_RESOLVERS:
result = resolver(name, base, origin, is_optional, default)
if result is not None:
return f" = {result}"
return ""
def _to_snake_plural(name):
"""CamelCase → snake_case_plural for table names."""
s = re.sub(r"(?<=[a-z])(?=[A-Z])", "_", name).lower()
if s.endswith("y") and not s.endswith("ey"):
return s[:-1] + "ies"
if s.endswith("s"):
return s + "es"
return s + "s"
_HEADER = [
'"""',
"SQLModel Table Models - GENERATED FILE",
"",
"Do not edit directly. Regenerate using modelgen.",
'"""',
"",
"from datetime import datetime",
"from enum import Enum",
"from typing import Any, Dict, List, Optional",
"from uuid import UUID, uuid4",
"",
"from sqlmodel import SQLModel, Field, Column",
"from sqlalchemy import JSON",
"",
]
class SQLModelGenerator(PydanticGenerator):
"""Generates SQLModel table classes."""
def _generate_header(self) -> List[str]:
return list(_HEADER)
def _generate_model_from_dataclass(self, cls: type) -> List[str]:
return _build_table(
cls.__name__,
cls.__doc__ or cls.__name__,
get_type_hints(cls),
{f.name: f for f in dc.fields(cls)},
self._resolve_type,
)
def _generate_model_from_definition(self, model_def) -> List[str]:
hints = {f.name: f.type_hint for f in model_def.fields}
defaults = {f.name: f.default for f in model_def.fields}
class FakeField:
def __init__(self, default):
self.default = default
fields = {name: FakeField(defaults.get(name, dc.MISSING)) for name in hints}
return _build_table(
model_def.name,
model_def.docstring or model_def.name,
hints,
fields,
self._resolve_type,
)
def _build_table(name, docstring, hints, fields, resolve_type_fn):
"""Build a SQLModel table class from field data."""
table_name = _to_snake_plural(name)
lines = [
f"class {name}(SQLModel, table=True):",
f' """{docstring.strip().split(chr(10))[0]}"""',
f' __tablename__ = "{table_name}"',
"",
]
for field_name, type_hint in hints.items():
if field_name.startswith("_"):
continue
field = fields.get(field_name)
default_val = dc.MISSING
if field and field.default is not dc.MISSING:
default_val = field.default
py_type = resolve_type_fn(type_hint, False)
field_extra = _resolve_field(field_name, type_hint, default_val)
lines.append(f" {field_name}: {py_type}{field_extra}")
return lines

View File

@@ -1,7 +1,6 @@
# Django # Django (admin viewer only — no app code depends on this)
Django>=4.2,<5.0 Django>=4.2,<5.0
django-environ>=0.11.2 django-environ>=0.11.2
psycopg2-binary>=2.9.9
# FastAPI # FastAPI
fastapi>=0.109.0 fastapi>=0.109.0
@@ -32,6 +31,10 @@ langfuse>=2.0.0
# Cloud LLM providers (only needed for cloud escalation stage) # Cloud LLM providers (only needed for cloud escalation stage)
anthropic>=0.40.0 anthropic>=0.40.0
# Database (SQLModel/SQLAlchemy + psycopg2)
sqlmodel>=0.0.14
psycopg2-binary>=2.9.9
# Detection pipeline orchestration # Detection pipeline orchestration
numpy>=1.24.0 numpy>=1.24.0
Pillow>=10.0.0 Pillow>=10.0.0