""" Job Schema Definitions Source of truth for job data models. TranscodeJob and ChunkJob share common lifecycle fields by convention. """ from dataclasses import dataclass, field from datetime import datetime from enum import Enum from typing import Any, Dict, List, Optional from uuid import UUID class JobStatus(str, Enum): """Status of a transcode/trim job.""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" @dataclass class TranscodeJob: """ A transcoding or trimming job in the queue. Jobs can either: - Transcode using a preset (full re-encode) - Trim only (stream copy with -c:v copy -c:a copy) A trim-only job has no preset and uses stream copy. """ id: UUID # Input source_asset_id: UUID # Configuration preset_id: Optional[UUID] = None preset_snapshot: Dict[str, Any] = field( default_factory=dict ) # Copy at creation time # Trimming (optional) trim_start: Optional[float] = None # seconds trim_end: Optional[float] = None # seconds # Output output_filename: str = "" output_path: Optional[str] = None output_asset_id: Optional[UUID] = None # Status & Progress status: JobStatus = JobStatus.PENDING progress: float = 0.0 # 0.0 to 100.0 current_frame: Optional[int] = None current_time: Optional[float] = None # seconds processed speed: Optional[str] = None # "2.5x" error_message: Optional[str] = None # Worker tracking celery_task_id: Optional[str] = None execution_arn: Optional[str] = None # AWS Step Functions execution ARN priority: int = 0 # Lower = higher priority # Timestamps created_at: Optional[datetime] = None started_at: Optional[datetime] = None completed_at: Optional[datetime] = None @property def is_trim_only(self) -> bool: """Check if this is a trim-only job (stream copy, no transcode).""" return self.preset_id is None and ( self.trim_start is not None or self.trim_end is not None ) class ChunkJobStatus(str, Enum): """Status of a chunk pipeline job.""" PENDING = "pending" CHUNKING = "chunking" PROCESSING = "processing" COLLECTING = "collecting" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" @dataclass class ChunkJob: """ A chunk pipeline job — splits a media file into chunks and processes them through a concurrent worker pool. """ id: UUID # Input source_asset_id: UUID # Configuration chunk_duration: float = 10.0 # seconds num_workers: int = 4 max_retries: int = 3 processor_type: str = "ffmpeg" # "ffmpeg", "checksum", "simulated_decode", "composite" # Status & Progress status: ChunkJobStatus = ChunkJobStatus.PENDING progress: float = 0.0 # 0.0 to 100.0 total_chunks: int = 0 processed_chunks: int = 0 failed_chunks: int = 0 retry_count: int = 0 error_message: Optional[str] = None # Result stats throughput_mbps: Optional[float] = None elapsed_seconds: Optional[float] = None # Worker tracking celery_task_id: Optional[str] = None priority: int = 0 # Lower = higher priority # Timestamps created_at: Optional[datetime] = None started_at: Optional[datetime] = None completed_at: Optional[datetime] = None