Files
mediaproc/core/schema/models/jobs.py
2026-03-13 14:29:38 -03:00

134 lines
3.4 KiB
Python

"""
Job Schema Definitions
Source of truth for job data models.
TranscodeJob and ChunkJob share common lifecycle fields by convention.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID
class JobStatus(str, Enum):
"""Status of a transcode/trim job."""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class TranscodeJob:
"""
A transcoding or trimming job in the queue.
Jobs can either:
- Transcode using a preset (full re-encode)
- Trim only (stream copy with -c:v copy -c:a copy)
A trim-only job has no preset and uses stream copy.
"""
id: UUID
# Input
source_asset_id: UUID
# Configuration
preset_id: Optional[UUID] = None
preset_snapshot: Dict[str, Any] = field(
default_factory=dict
) # Copy at creation time
# Trimming (optional)
trim_start: Optional[float] = None # seconds
trim_end: Optional[float] = None # seconds
# Output
output_filename: str = ""
output_path: Optional[str] = None
output_asset_id: Optional[UUID] = None
# Status & Progress
status: JobStatus = JobStatus.PENDING
progress: float = 0.0 # 0.0 to 100.0
current_frame: Optional[int] = None
current_time: Optional[float] = None # seconds processed
speed: Optional[str] = None # "2.5x"
error_message: Optional[str] = None
# Worker tracking
celery_task_id: Optional[str] = None
execution_arn: Optional[str] = None # AWS Step Functions execution ARN
priority: int = 0 # Lower = higher priority
# Timestamps
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@property
def is_trim_only(self) -> bool:
"""Check if this is a trim-only job (stream copy, no transcode)."""
return self.preset_id is None and (
self.trim_start is not None or self.trim_end is not None
)
class ChunkJobStatus(str, Enum):
"""Status of a chunk pipeline job."""
PENDING = "pending"
CHUNKING = "chunking"
PROCESSING = "processing"
COLLECTING = "collecting"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class ChunkJob:
"""
A chunk pipeline job — splits a media file into chunks and processes them
through a concurrent worker pool.
"""
id: UUID
# Input
source_asset_id: UUID
# Configuration
chunk_duration: float = 10.0 # seconds
num_workers: int = 4
max_retries: int = 3
processor_type: str = "ffmpeg" # "ffmpeg", "checksum", "simulated_decode", "composite"
# Status & Progress
status: ChunkJobStatus = ChunkJobStatus.PENDING
progress: float = 0.0 # 0.0 to 100.0
total_chunks: int = 0
processed_chunks: int = 0
failed_chunks: int = 0
retry_count: int = 0
error_message: Optional[str] = None
# Result stats
throughput_mbps: Optional[float] = None
elapsed_seconds: Optional[float] = None
# Worker tracking
celery_task_id: Optional[str] = None
priority: int = 0 # Lower = higher priority
# Timestamps
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None