use sqlalchemy pattern
This commit is contained in:
@@ -25,28 +25,25 @@ from .grpc import (
|
||||
ProgressUpdate,
|
||||
WorkerStatus,
|
||||
)
|
||||
from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
|
||||
from .detect_jobs import (
|
||||
DetectJob, DetectJobStatus, RunType,
|
||||
from .job import (
|
||||
Job, JobStatus, RunType,
|
||||
Timeline, Checkpoint,
|
||||
BrandSource, KnownBrand, SourceBrandSighting,
|
||||
BrandSource, Brand,
|
||||
)
|
||||
from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS
|
||||
from .media import AssetStatus, MediaAsset
|
||||
from .presets import BUILTIN_PRESETS, TranscodePreset
|
||||
from .detect import DETECT_VIEWS # noqa: F401 — discovered by modelgen generic loader
|
||||
from .inference import INFERENCE_VIEWS # noqa: F401 — GPU inference server API types
|
||||
from .ui_state import UI_STATE_VIEWS # noqa: F401 — UI store state types
|
||||
from .stages import StageConfigField, StageIO, StageDefinition, STAGE_VIEWS # noqa: F401
|
||||
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
|
||||
from .sources import ChunkInfo, SourceJob, SourceType
|
||||
|
||||
# Core domain models - generates Django, SQLModel, TypeScript
|
||||
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob,
|
||||
DetectJob, Timeline, Checkpoint,
|
||||
KnownBrand, SourceBrandSighting]
|
||||
# Core domain models - generates SQLModel, TypeScript
|
||||
DATACLASSES = [MediaAsset, TranscodePreset,
|
||||
Job, Timeline, Checkpoint, Brand]
|
||||
|
||||
# API request/response models - generates TypeScript only (no Django)
|
||||
# WorkerStatus from grpc.py is reused here
|
||||
# API request/response models
|
||||
API_MODELS = [
|
||||
CreateJobRequest,
|
||||
UpdateAssetRequest,
|
||||
@@ -58,14 +55,13 @@ API_MODELS = [
|
||||
ChunkInfo,
|
||||
]
|
||||
|
||||
# Status enums - included in generated code
|
||||
ENUMS = [AssetStatus, JobStatus, ChunkJobStatus, DetectJobStatus, RunType, BrandSource, SourceType]
|
||||
# Status enums
|
||||
ENUMS = [AssetStatus, JobStatus, RunType, BrandSource, SourceType]
|
||||
|
||||
# View/event models - generates TypeScript for UI consumption
|
||||
# View/event models
|
||||
VIEWS = [ChunkEvent, WorkerEvent, PipelineStats, ChunkOutputFile]
|
||||
|
||||
|
||||
# gRPC messages - generates Proto
|
||||
# gRPC messages
|
||||
GRPC_MESSAGES = [
|
||||
JobRequest,
|
||||
JobResponse,
|
||||
@@ -83,18 +79,27 @@ __all__ = [
|
||||
# Models
|
||||
"MediaAsset",
|
||||
"TranscodePreset",
|
||||
"TranscodeJob",
|
||||
"ChunkJob",
|
||||
# API Models
|
||||
"Job",
|
||||
"Timeline",
|
||||
"Checkpoint",
|
||||
"KnownBrand",
|
||||
"SourceBrandSighting",
|
||||
# Enums
|
||||
"AssetStatus",
|
||||
"JobStatus",
|
||||
"RunType",
|
||||
"BrandSource",
|
||||
"SourceType",
|
||||
# Stages
|
||||
"StageConfigField",
|
||||
"StageIO",
|
||||
"StageDefinition",
|
||||
# API
|
||||
"CreateJobRequest",
|
||||
"UpdateAssetRequest",
|
||||
"DeleteResult",
|
||||
"ScanResult",
|
||||
"SystemStatus",
|
||||
# Enums
|
||||
"AssetStatus",
|
||||
"JobStatus",
|
||||
"ChunkJobStatus",
|
||||
# gRPC
|
||||
"GRPC_SERVICE",
|
||||
"JobRequest",
|
||||
@@ -113,7 +118,6 @@ __all__ = [
|
||||
"PipelineStats",
|
||||
"ChunkOutputFile",
|
||||
# Sources
|
||||
"SourceType",
|
||||
"SourceJob",
|
||||
"ChunkInfo",
|
||||
# For generator
|
||||
|
||||
158
core/schema/models/job.py
Normal file
158
core/schema/models/job.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Job, Timeline, and Checkpoint Schema Definitions
|
||||
|
||||
Source of truth for pipeline jobs, timelines, and checkpoints.
|
||||
Generates: SQLModel (core/db/models.py), TypeScript via modelgen.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class RunType(str, Enum):
|
||||
INITIAL = "initial"
|
||||
REPLAY = "replay"
|
||||
RETRY = "retry"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Job:
|
||||
"""
|
||||
A pipeline job.
|
||||
|
||||
Each invocation (initial run, replay, retry) creates a Job.
|
||||
Jobs for the same source are linked via parent_id.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
|
||||
# Input
|
||||
source_asset_id: UUID
|
||||
video_path: str
|
||||
profile_name: str = "soccer_broadcast"
|
||||
|
||||
# Lineage
|
||||
parent_id: Optional[UUID] = None
|
||||
run_type: RunType = RunType.INITIAL
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Status
|
||||
status: JobStatus = JobStatus.PENDING
|
||||
current_stage: Optional[str] = None
|
||||
progress: float = 0.0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
# Results summary
|
||||
total_detections: int = 0
|
||||
brands_found: int = 0
|
||||
cloud_llm_calls: int = 0
|
||||
estimated_cost_usd: float = 0.0
|
||||
|
||||
# Worker tracking
|
||||
celery_task_id: Optional[str] = None
|
||||
priority: int = 0
|
||||
|
||||
# Timestamps
|
||||
created_at: Optional[datetime] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Timeline:
|
||||
"""
|
||||
The frame sequence from a source video.
|
||||
|
||||
Independent of stages — exists before any stage runs.
|
||||
Frames stored in MinIO as JPEGs, metadata here.
|
||||
One timeline per job.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
source_asset_id: Optional[UUID] = None
|
||||
source_video: str = ""
|
||||
profile_name: str = ""
|
||||
fps: float = 2.0
|
||||
|
||||
frames_prefix: str = "" # s3: timeline/{id}/frames/
|
||||
frames_manifest: Dict[int, str] = field(default_factory=dict) # seq → s3 key
|
||||
frames_meta: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""
|
||||
A snapshot of pipeline state on a timeline.
|
||||
|
||||
Stage outputs stored as JSONB — each stage serializes to JSON,
|
||||
the checkpoint stores it without knowing the shape.
|
||||
|
||||
parent_id forms a tree: multiple children from the same parent
|
||||
= different config tries from the same starting point.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
timeline_id: UUID
|
||||
parent_id: Optional[UUID] = None # null = root checkpoint
|
||||
|
||||
# Stage outputs — JSONB per stage, opaque to the checkpoint layer
|
||||
stage_outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Config that produced this checkpoint
|
||||
config_overrides: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Pipeline state
|
||||
stats: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Scenario bookmark
|
||||
is_scenario: bool = False
|
||||
scenario_label: str = ""
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
# --- Brands ---
|
||||
|
||||
class BrandSource(str, Enum):
|
||||
OCR = "ocr"
|
||||
VLM = "local_vlm"
|
||||
CLOUD = "cloud_llm"
|
||||
MANUAL = "manual"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Brand:
|
||||
"""
|
||||
A brand discovered or registered in the system.
|
||||
|
||||
Airings track where/when the brand appeared — each airing
|
||||
references a timeline and a frame range.
|
||||
"""
|
||||
|
||||
id: UUID
|
||||
canonical_name: str
|
||||
aliases: List[str] = field(default_factory=list)
|
||||
source: BrandSource = BrandSource.OCR # how first discovered
|
||||
confirmed: bool = False
|
||||
|
||||
# Airings — JSONB array of appearances
|
||||
# [{timeline_id, frame_start, frame_end, confidence, source, timestamp}]
|
||||
airings: List[Dict[str, Any]] = field(default_factory=list)
|
||||
total_airings: int = 0
|
||||
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
Reference in New Issue
Block a user