phase 10
This commit is contained in:
@@ -26,13 +26,18 @@ from .grpc import (
|
||||
WorkerStatus,
|
||||
)
|
||||
from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
|
||||
from .detect_jobs import (
|
||||
DetectJob, DetectJobStatus, RunType, StageCheckpoint,
|
||||
BrandSource, KnownBrand, SourceBrandSighting,
|
||||
)
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
|
||||
|
||||
# Core domain models - generates Django, Pydantic, TypeScript
|
||||
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob]
|
||||
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
|
||||
DetectJob, StageCheckpoint, KnownBrand, SourceBrandSighting]
|
||||
|
||||
# API request/response models - generates TypeScript only (no Django)
|
||||
# WorkerStatus from grpc.py is reused here
|
||||
@@ -46,7 +51,7 @@ API_MODELS = [
|
||||
]
|
||||
|
||||
# Status enums - included in generated code
|
||||
ENUMS = [AssetStatus, JobStatus, ChunkJobStatus]
|
||||
ENUMS = [AssetStatus, JobStatus, ChunkJobStatus, DetectJobStatus, RunType, BrandSource]
|
||||
|
||||
# View/event models - generates TypeScript for UI consumption
|
||||
VIEWS = [ChunkEvent, WorkerEvent, PipelineStats, ChunkOutputFile]
|
||||
|
||||
@@ -149,6 +149,64 @@ class JobComplete:
|
||||
report: Optional[DetectionReportSummary] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunContext:
|
||||
"""Run context injected into all SSE events for grouping."""
|
||||
|
||||
run_id: str
|
||||
parent_job_id: str
|
||||
run_type: str = "initial" # initial | replay | retry
|
||||
|
||||
|
||||
# --- Checkpoint API types ---
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckpointInfo:
|
||||
"""Available checkpoint for a stage."""
|
||||
|
||||
stage: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplayRequest:
|
||||
"""Request to replay pipeline from a specific stage."""
|
||||
|
||||
job_id: str
|
||||
start_stage: str
|
||||
config_overrides: Optional[dict] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplayResponse:
|
||||
"""Result of a replay invocation."""
|
||||
|
||||
status: str
|
||||
job_id: str
|
||||
start_stage: str
|
||||
detections: int = 0
|
||||
brands_found: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryRequest:
|
||||
"""Request to queue async retry with different config."""
|
||||
|
||||
job_id: str
|
||||
config_overrides: Optional[dict] = None
|
||||
start_stage: str = "escalate_vlm"
|
||||
schedule_seconds: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryResponse:
|
||||
"""Result of queueing a retry task."""
|
||||
|
||||
status: str
|
||||
task_id: str
|
||||
job_id: str
|
||||
|
||||
|
||||
# --- Export lists for modelgen ---
|
||||
|
||||
DETECT_VIEWS = [
|
||||
@@ -163,4 +221,10 @@ DETECT_VIEWS = [
|
||||
LogEvent,
|
||||
DetectionReportSummary,
|
||||
JobComplete,
|
||||
RunContext,
|
||||
CheckpointInfo,
|
||||
ReplayRequest,
|
||||
ReplayResponse,
|
||||
RetryRequest,
|
||||
RetryResponse,
|
||||
]
|
||||
|
||||
162
core/schema/models/detect_jobs.py
Normal file
162
core/schema/models/detect_jobs.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Detection Job and Checkpoint Schema Definitions
|
||||
|
||||
Source of truth for detection pipeline job tracking and stage checkpoints.
|
||||
Follows the TranscodeJob/ChunkJob pattern.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
class DetectJobStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class RunType(str, Enum):
|
||||
INITIAL = "initial"
|
||||
REPLAY = "replay"
|
||||
RETRY = "retry"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectJob:
|
||||
"""
|
||||
A detection pipeline job.
|
||||
|
||||
Each invocation of the pipeline (initial run, replay, retry) creates a DetectJob.
|
||||
Jobs for the same source video are linked via parent_job_id.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
|
||||
# Input
|
||||
source_asset_id: UUID
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
|
||||
# Run lineage
|
||||
parent_job_id: Optional[UUID] = None # links all runs for the same source
|
||||
run_type: RunType = RunType.INITIAL
|
||||
replay_from_stage: Optional[str] = None # null for initial runs
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Status
|
||||
status: DetectJobStatus = DetectJobStatus.PENDING
|
||||
current_stage: Optional[str] = None
|
||||
progress: float = 0.0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
# Results summary
|
||||
total_detections: int = 0
|
||||
brands_found: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
estimated_cost_usd: float = 0.0
|
||||
|
||||
# Worker tracking
|
||||
celery_task_id: Optional[str] = None
|
||||
priority: int = 0
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StageCheckpoint:
|
||||
"""
|
||||
A checkpoint saved after a pipeline stage completes.
|
||||
|
||||
Binary data (frame images, crops) goes to S3/MinIO.
|
||||
Everything else (structured state) lives here in Postgres.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
job_id: UUID
|
||||
stage: str
|
||||
stage_index: int # position in NODES list (0-7)
|
||||
|
||||
# S3 reference for binary data only
|
||||
frames_prefix: str = "" # s3 prefix: checkpoints/{job_id}/frames/
|
||||
|
||||
# Frame metadata (non-image fields)
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list) # sequence, chunk_id, timestamp, hash
|
||||
filtered_frame_sequences: List[int] = field(default_factory=list)
|
||||
|
||||
# Detection state (full structured data, not just summaries)
|
||||
boxes_by_frame: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
|
||||
text_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
||||
unresolved_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
||||
detections: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
config_snapshot: Dict[str, Any] = field(default_factory=dict)
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Input refs (for replay)
|
||||
video_path: str = ""
|
||||
profile_name: str = ""
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class BrandSource(str, Enum):
|
||||
"""How a brand was first identified."""
|
||||
OCR = "ocr"
|
||||
VLM = "local_vlm"
|
||||
CLOUD = "cloud_llm"
|
||||
MANUAL = "manual" # user-added via UI
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnownBrand:
|
||||
"""
|
||||
A brand discovered or registered in the system.
|
||||
|
||||
Global — not per-source. Accumulates across all pipeline runs.
|
||||
Aliases enable fuzzy matching without re-escalating to VLM.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
canonical_name: str # normalized display name
|
||||
aliases: List[str] = field(default_factory=list) # known spellings/variants
|
||||
first_source: BrandSource = BrandSource.OCR
|
||||
total_occurrences: int = 0
|
||||
confirmed: bool = False # manually confirmed by user
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceBrandSighting:
|
||||
"""
|
||||
A brand seen in a specific source (video/asset).
|
||||
|
||||
Per-source session cache — avoids re-escalating the same brand
|
||||
on subsequent frames or re-runs of the same source.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: UUID # the video this sighting belongs to
|
||||
brand_id: UUID # FK to KnownBrand
|
||||
brand_name: str # denormalized for fast lookup
|
||||
first_seen_timestamp: float = 0.0
|
||||
last_seen_timestamp: float = 0.0
|
||||
occurrences: int = 0
|
||||
detection_source: BrandSource = BrandSource.OCR
|
||||
avg_confidence: float = 0.0
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
Reference in New Issue
Block a user