This commit is contained in:
2026-03-27 22:54:58 -03:00
parent 3d8e7291f3
commit 94c7b21ae5
8 changed files with 233 additions and 224 deletions

View File

@@ -13,22 +13,6 @@ class AssetStatus(models.TextChoices):
ERROR = "error", "Error"
class JobStatus(models.TextChoices):
PENDING = "pending", "Pending"
PROCESSING = "processing", "Processing"
COMPLETED = "completed", "Completed"
FAILED = "failed", "Failed"
CANCELLED = "cancelled", "Cancelled"
class ChunkJobStatus(models.TextChoices):
PENDING = "pending", "Pending"
CHUNKING = "chunking", "Chunking"
PROCESSING = "processing", "Processing"
COLLECTING = "collecting", "Collecting"
COMPLETED = "completed", "Completed"
FAILED = "failed", "Failed"
CANCELLED = "cancelled", "Cancelled"
class DetectJobStatus(models.TextChoices):
PENDING = "pending", "Pending"
RUNNING = "running", "Running"
PAUSED = "paused", "Paused"
@@ -111,81 +95,17 @@ class TranscodePreset(models.Model):
return self.name
class TranscodeJob(models.Model):
"""A transcoding or trimming job in the queue."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
source_asset_id = models.UUIDField()
preset_id = models.UUIDField(null=True, blank=True)
preset_snapshot = models.JSONField(default=dict, blank=True)
trim_start = models.FloatField(null=True, blank=True, default=None)
trim_end = models.FloatField(null=True, blank=True, default=None)
output_filename = models.CharField(max_length=500)
output_path = models.CharField(max_length=1000, null=True, blank=True)
output_asset_id = models.UUIDField(null=True, blank=True)
status = models.CharField(max_length=20, choices=JobStatus.choices, default=JobStatus.PENDING)
progress = models.FloatField(default=0.0)
current_frame = models.IntegerField(null=True, blank=True, default=None)
current_time = models.FloatField(null=True, blank=True, default=None)
speed = models.CharField(max_length=255, null=True, blank=True)
error_message = models.TextField(blank=True, default='')
celery_task_id = models.CharField(max_length=255, null=True, blank=True)
execution_arn = models.CharField(max_length=255, null=True, blank=True)
priority = models.IntegerField(default=0)
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)
class ChunkJob(models.Model):
"""A chunk pipeline job — splits a media file into chunks and processes them"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
source_asset_id = models.UUIDField()
chunk_duration = models.FloatField(default=10.0)
num_workers = models.IntegerField(default=4)
max_retries = models.IntegerField(default=3)
processor_type = models.CharField(max_length=255)
status = models.CharField(max_length=20, choices=ChunkJobStatus.choices, default=ChunkJobStatus.PENDING)
progress = models.FloatField(default=0.0)
total_chunks = models.IntegerField(default=0)
processed_chunks = models.IntegerField(default=0)
failed_chunks = models.IntegerField(default=0)
retry_count = models.IntegerField(default=0)
error_message = models.TextField(blank=True, default='')
throughput_mbps = models.FloatField(null=True, blank=True, default=None)
elapsed_seconds = models.FloatField(null=True, blank=True, default=None)
celery_task_id = models.CharField(max_length=255, null=True, blank=True)
priority = models.IntegerField(default=0)
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)
class DetectJob(models.Model):
"""A detection pipeline job."""
class Job(models.Model):
"""A pipeline job."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
source_asset_id = models.UUIDField()
video_path = models.CharField(max_length=1000)
profile_name = models.CharField(max_length=255)
parent_job_id = models.UUIDField(null=True, blank=True)
parent_id = models.UUIDField(null=True, blank=True)
run_type = models.CharField(max_length=20, choices=RunType.choices, default=RunType.INITIAL)
replay_from_stage = models.CharField(max_length=255, null=True, blank=True)
config_overrides = models.JSONField(default=dict, blank=True)
status = models.CharField(max_length=20, choices=DetectJobStatus.choices, default=DetectJobStatus.PENDING)
status = models.CharField(max_length=20, choices=JobStatus.choices, default=JobStatus.PENDING)
current_stage = models.CharField(max_length=255, null=True, blank=True)
progress = models.FloatField(default=0.0)
error_message = models.TextField(blank=True, default='')
@@ -206,26 +126,17 @@ class DetectJob(models.Model):
return str(self.id)
class StageCheckpoint(models.Model):
"""A checkpoint saved after a pipeline stage completes."""
class Timeline(models.Model):
"""The frame sequence from a source video."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
job_id = models.UUIDField()
stage = models.CharField(max_length=255)
stage_index = models.IntegerField()
source_asset_id = models.UUIDField(null=True, blank=True)
source_video = models.CharField(max_length=255)
profile_name = models.CharField(max_length=255)
fps = models.FloatField(default=2.0)
frames_prefix = models.CharField(max_length=255)
frames_manifest = models.JSONField(default=dict, blank=True)
frames_meta = models.JSONField(default=list, blank=True)
filtered_frame_sequences = models.JSONField(default=list, blank=True)
boxes_by_frame = models.JSONField(default=dict, blank=True)
text_candidates = models.JSONField(default=list, blank=True)
unresolved_candidates = models.JSONField(default=list, blank=True)
detections = models.JSONField(default=list, blank=True)
stats = models.JSONField(default=dict, blank=True)
config_snapshot = models.JSONField(default=dict, blank=True)
config_overrides = models.JSONField(default=dict, blank=True)
video_path = models.CharField(max_length=1000)
profile_name = models.CharField(max_length=255)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
@@ -235,15 +146,36 @@ class StageCheckpoint(models.Model):
return str(self.id)
class KnownBrand(models.Model):
class Checkpoint(models.Model):
"""A snapshot of pipeline state on a timeline."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
timeline_id = models.UUIDField()
parent_id = models.UUIDField(null=True, blank=True)
stage_outputs = models.JSONField(default=dict, blank=True)
config_overrides = models.JSONField(default=dict, blank=True)
stats = models.JSONField(default=dict, blank=True)
is_scenario = models.BooleanField(default=False)
scenario_label = models.CharField(max_length=255)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)
class Brand(models.Model):
"""A brand discovered or registered in the system."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
canonical_name = models.CharField(max_length=255)
aliases = models.JSONField(default=list, blank=True)
first_source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
total_occurrences = models.IntegerField(default=0)
source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
confirmed = models.BooleanField(default=False)
airings = models.JSONField(default=list, blank=True)
total_airings = models.IntegerField(default=0)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
@@ -253,24 +185,3 @@ class KnownBrand(models.Model):
def __str__(self):
return str(self.id)
class SourceBrandSighting(models.Model):
"""A brand seen in a specific source (video/asset)."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
source_asset_id = models.UUIDField()
brand_id = models.UUIDField()
brand_name = models.CharField(max_length=255)
first_seen_timestamp = models.FloatField(default=0.0)
last_seen_timestamp = models.FloatField(default=0.0)
occurrences = models.IntegerField(default=0)
detection_source = models.CharField(max_length=20, choices=BrandSource.choices, default=BrandSource.OCR)
avg_confidence = models.FloatField(default=0.0)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)

156
core/db/models.py Normal file
View File

@@ -0,0 +1,156 @@
"""
SQLModel Table Models - GENERATED FILE
Do not edit directly. Regenerate using modelgen.
"""
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID, uuid4
from sqlmodel import SQLModel, Field, Column
from sqlalchemy import JSON
class AssetStatus(str, Enum):
PENDING = "pending"
READY = "ready"
ERROR = "error"
class JobStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
PAUSED = "paused"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class RunType(str, Enum):
INITIAL = "initial"
REPLAY = "replay"
RETRY = "retry"
class BrandSource(str, Enum):
OCR = "ocr"
VLM = "local_vlm"
CLOUD = "cloud_llm"
MANUAL = "manual"
class SourceType(str, Enum):
CHUNK_JOB = "chunk_job"
UPLOAD = "upload"
DEVICE = "device"
STREAM = "stream"
class MediaAsset(SQLModel, table=True):
"""A video/audio file registered in the system."""
__tablename__ = "media_assets"
id: UUID = Field(default_factory=uuid4, primary_key=True)
filename: str
file_path: str
status: AssetStatus = "pending"
error_message: Optional[str] = None
file_size: Optional[int] = None
duration: Optional[float] = None
video_codec: Optional[str] = None
audio_codec: Optional[str] = None
width: Optional[int] = None
height: Optional[int] = None
framerate: Optional[float] = None
bitrate: Optional[int] = None
properties: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
comments: str = ""
tags: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class TranscodePreset(SQLModel, table=True):
"""A reusable transcoding configuration (like Handbrake presets)."""
__tablename__ = "transcode_presets"
id: UUID = Field(default_factory=uuid4, primary_key=True)
name: str
description: str = ""
is_builtin: bool = False
container: str = "mp4"
video_codec: str = "libx264"
video_bitrate: Optional[str] = None
video_crf: Optional[int] = None
video_preset: Optional[str] = None
resolution: Optional[str] = None
framerate: Optional[float] = None
audio_codec: str = "aac"
audio_bitrate: Optional[str] = None
audio_channels: Optional[int] = None
audio_samplerate: Optional[int] = None
extra_args: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Job(SQLModel, table=True):
"""A pipeline job."""
__tablename__ = "jobs"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: UUID = Field(index=True)
video_path: str
profile_name: str = "soccer_broadcast"
parent_id: Optional[UUID] = None
run_type: RunType = "initial"
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
status: JobStatus = "pending"
current_stage: Optional[str] = None
progress: float = 0.0
error_message: Optional[str] = None
total_detections: int = 0
brands_found: int = 0
cloud_llm_calls: int = 0
estimated_cost_usd: float = 0.0
celery_task_id: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class Timeline(SQLModel, table=True):
"""The frame sequence from a source video."""
__tablename__ = "timelines"
id: UUID = Field(default_factory=uuid4, primary_key=True)
source_asset_id: Optional[UUID] = Field(default=None, index=True)
source_video: str = ""
profile_name: str = ""
fps: float = 2.0
frames_prefix: str = ""
frames_manifest: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
frames_meta: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Checkpoint(SQLModel, table=True):
"""A snapshot of pipeline state on a timeline."""
__tablename__ = "checkpoints"
id: UUID = Field(default_factory=uuid4, primary_key=True)
timeline_id: UUID
parent_id: Optional[UUID] = None
stage_outputs: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
config_overrides: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
stats: Dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON, nullable=False, server_default='{}'))
is_scenario: bool = False
scenario_label: str = ""
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
class Brand(SQLModel, table=True):
"""A brand discovered or registered in the system."""
__tablename__ = "brands"
id: UUID = Field(default_factory=uuid4, primary_key=True)
canonical_name: str = Field(index=True)
aliases: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
source: BrandSource = "ocr"
confirmed: bool = False
airings: List[str] = Field(default_factory=list, sa_column=Column(JSON, nullable=False, server_default='[]'))
total_airings: int = 0
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow)
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow)

View File

@@ -94,6 +94,9 @@ class StatsUpdate:
cloud_llm_calls: int = 0
processing_time_seconds: float = 0.0
estimated_cloud_cost_usd: float = 0.0
run_id: Optional[str] = None
parent_job_id: Optional[str] = None
run_type: str = "initial"
@dataclass

View File

@@ -63,6 +63,9 @@ class StatsUpdate(BaseModel):
cloud_llm_calls: int = 0
processing_time_seconds: float = 0.0
estimated_cloud_cost_usd: float = 0.0
run_id: Optional[str] = None
parent_job_id: Optional[str] = None
run_type: str = "initial"
class FrameUpdate(BaseModel):
"""Current frame being processed. SSE event: frame_update"""
@@ -113,6 +116,8 @@ class RunContext(BaseModel):
class CheckpointInfo(BaseModel):
"""Available checkpoint for a stage."""
stage: str
is_scenario: bool = False
scenario_label: str = ""
class ReplayRequest(BaseModel):
"""Request to replay pipeline from a specific stage."""

View File

@@ -5,9 +5,7 @@
*/
export type AssetStatus = "pending" | "ready" | "error";
export type JobStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
export type ChunkJobStatus = "pending" | "chunking" | "processing" | "collecting" | "completed" | "failed" | "cancelled";
export type DetectJobStatus = "pending" | "running" | "paused" | "completed" | "failed" | "cancelled";
export type JobStatus = "pending" | "running" | "paused" | "completed" | "failed" | "cancelled";
export type RunType = "initial" | "replay" | "retry";
export type BrandSource = "ocr" | "local_vlm" | "cloud_llm" | "manual";
export type SourceType = "chunk_job" | "upload" | "device" | "stream";
@@ -54,63 +52,15 @@ export interface TranscodePreset {
updated_at: string | null;
}
export interface TranscodeJob {
id: string;
source_asset_id: string;
preset_id: string | null;
preset_snapshot: Record<string, unknown>;
trim_start: number | null;
trim_end: number | null;
output_filename: string;
output_path: string | null;
output_asset_id: string | null;
status: JobStatus;
progress: number;
current_frame: number | null;
current_time: number | null;
speed: string | null;
error_message: string | null;
celery_task_id: string | null;
execution_arn: string | null;
priority: number;
created_at: string | null;
started_at: string | null;
completed_at: string | null;
}
export interface ChunkJob {
id: string;
source_asset_id: string;
chunk_duration: number;
num_workers: number;
max_retries: number;
processor_type: string;
status: ChunkJobStatus;
progress: number;
total_chunks: number;
processed_chunks: number;
failed_chunks: number;
retry_count: number;
error_message: string | null;
throughput_mbps: number | null;
elapsed_seconds: number | null;
celery_task_id: string | null;
priority: number;
created_at: string | null;
started_at: string | null;
completed_at: string | null;
}
export interface DetectJob {
export interface Job {
id: string;
source_asset_id: string;
video_path: string;
profile_name: string;
parent_job_id: string | null;
parent_id: string | null;
run_type: RunType;
replay_from_stage: string | null;
config_overrides: Record<string, unknown>;
status: DetectJobStatus;
status: JobStatus;
current_stage: string | null;
progress: number;
error_message: string | null;
@@ -125,51 +75,42 @@ export interface DetectJob {
completed_at: string | null;
}
export interface StageCheckpoint {
export interface Timeline {
id: string;
job_id: string;
stage: string;
stage_index: number;
source_asset_id: string | null;
source_video: string;
profile_name: string;
fps: number;
frames_prefix: string;
frames_manifest: Record<string, unknown>;
frames_meta: string[];
filtered_frame_sequences: number[];
boxes_by_frame: Record<string, unknown>;
text_candidates: string[];
unresolved_candidates: string[];
detections: string[];
stats: Record<string, unknown>;
config_snapshot: Record<string, unknown>;
config_overrides: Record<string, unknown>;
video_path: string;
profile_name: string;
created_at: string | null;
}
export interface KnownBrand {
export interface Checkpoint {
id: string;
timeline_id: string;
parent_id: string | null;
stage_outputs: Record<string, unknown>;
config_overrides: Record<string, unknown>;
stats: Record<string, unknown>;
is_scenario: boolean;
scenario_label: string;
created_at: string | null;
}
export interface Brand {
id: string;
canonical_name: string;
aliases: string[];
first_source: BrandSource;
total_occurrences: number;
source: BrandSource;
confirmed: boolean;
airings: string[];
total_airings: number;
created_at: string | null;
updated_at: string | null;
}
export interface SourceBrandSighting {
id: string;
source_asset_id: string;
brand_id: string;
brand_name: string;
first_seen_timestamp: number;
last_seen_timestamp: number;
occurrences: number;
detection_source: BrandSource;
avg_confidence: number;
created_at: string | null;
}
export interface CreateJobRequest {
source_asset_id: string;
preset_id: string | null;

View File

@@ -42,9 +42,9 @@ source.on<StatsUpdate>('stats_update', (e) => {
stats.value = e
if (!runContext.value && e.run_id) {
runContext.value = {
run_id: (e as any).run_id,
parent_job_id: (e as any).parent_job_id,
run_type: (e as any).run_type ?? 'initial',
run_id: e.run_id!,
parent_job_id: e.parent_job_id!,
run_type: e.run_type ?? 'initial',
}
}
})
@@ -267,8 +267,10 @@ const editorOverlays = ref<FrameOverlay[]>([])
// Boxes from edge detection (local or server)
const editorBoxes = ref<FrameBBox[]>([])
type RegionBox = { x: number; y: number; w: number; h: number; confidence: number; label: string }
function onReplayResult(result: {
regions_by_frame?: Record<string, unknown[]>
regions_by_frame?: Record<string, RegionBox[]>
debug?: Record<string, { edge_overlay_b64: string; lines_overlay_b64: string; horizontal_count: number; pair_count: number }>
frameWidth?: number
frameHeight?: number
@@ -281,7 +283,7 @@ function onReplayResult(result: {
// Merge incoming per-frame regions into accumulated store
if (result.regions_by_frame) {
for (const [seqStr, regions] of Object.entries(result.regions_by_frame)) {
allFrameRegions.value[Number(seqStr)] = regions as any[]
allFrameRegions.value[Number(seqStr)] = regions
}
}

View File

@@ -56,6 +56,9 @@ export interface StatsUpdate {
cloud_llm_calls: number;
processing_time_seconds: number;
estimated_cloud_cost_usd: number;
run_id: string | null;
parent_job_id: string | null;
run_type: string;
}
export interface FrameUpdate {

View File

@@ -53,19 +53,7 @@ export interface PreprocessingConfigOverrides {
contrast: boolean | null;
}
export interface RegionAnalysisConfigOverrides {
enabled: boolean | null;
edge_canny_low: number | null;
edge_canny_high: number | null;
edge_hough_threshold: number | null;
edge_hough_min_length: number | null;
edge_hough_max_gap: number | null;
edge_pair_max_distance: number | null;
edge_pair_min_distance: number | null;
}
export interface ConfigOverrides {
region_analysis: RegionAnalysisConfigOverrides | null;
detection: DetectionConfigOverrides | null;
ocr: OCRConfigOverrides | null;
resolver: ResolverConfigOverrides | null;