mediaproc/core/schema/models/jobs.py

"""
Job Schema Definitions

Source of truth for job data models.
TranscodeJob and ChunkJob share common lifecycle fields by convention.
"""

from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID


class JobStatus(str, Enum):
    """Status of a transcode/trim job."""

    PENDING = "pending"
    PROCESSING = "processing"
    COMPLETED = "completed"
    FAILED = "failed"
    CANCELLED = "cancelled"


@dataclass
class TranscodeJob:
    """
    A transcoding or trimming job in the queue.

    Jobs can either:
    - Transcode using a preset (full re-encode)
    - Trim only (stream copy with -c:v copy -c:a copy)

    A trim-only job has no preset and uses stream copy.
    """

    id: UUID

    # Input
    source_asset_id: UUID

    # Configuration
    preset_id: Optional[UUID] = None
    preset_snapshot: Dict[str, Any] = field(
        default_factory=dict
    )  # Copy at creation time

    # Trimming (optional)
    trim_start: Optional[float] = None  # seconds
    trim_end: Optional[float] = None  # seconds

    # Output
    output_filename: str = ""
    output_path: Optional[str] = None
    output_asset_id: Optional[UUID] = None

    # Status & Progress
    status: JobStatus = JobStatus.PENDING
    progress: float = 0.0  # 0.0 to 100.0
    current_frame: Optional[int] = None
    current_time: Optional[float] = None  # seconds processed
    speed: Optional[str] = None  # "2.5x"
    error_message: Optional[str] = None

    # Worker tracking
    celery_task_id: Optional[str] = None
    execution_arn: Optional[str] = None  # AWS Step Functions execution ARN
    priority: int = 0  # Lower = higher priority

    # Timestamps
    created_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None

    @property
    def is_trim_only(self) -> bool:
        """Check if this is a trim-only job (stream copy, no transcode)."""
        return self.preset_id is None and (
            self.trim_start is not None or self.trim_end is not None
        )


class ChunkJobStatus(str, Enum):
    """Status of a chunk pipeline job."""

    PENDING = "pending"
    CHUNKING = "chunking"
    PROCESSING = "processing"
    COLLECTING = "collecting"
    COMPLETED = "completed"
    FAILED = "failed"
    CANCELLED = "cancelled"


@dataclass
class ChunkJob:
    """
    A chunk pipeline job — splits a media file into chunks and processes them
    through a concurrent worker pool.
    """

    id: UUID

    # Input
    source_asset_id: UUID

    # Configuration
    chunk_duration: float = 10.0  # seconds
    num_workers: int = 4
    max_retries: int = 3
    processor_type: str = "ffmpeg"  # "ffmpeg", "checksum", "simulated_decode", "composite"

    # Status & Progress
    status: ChunkJobStatus = ChunkJobStatus.PENDING
    progress: float = 0.0  # 0.0 to 100.0
    total_chunks: int = 0
    processed_chunks: int = 0
    failed_chunks: int = 0
    retry_count: int = 0
    error_message: Optional[str] = None

    # Result stats
    throughput_mbps: Optional[float] = None
    elapsed_seconds: Optional[float] = None

    # Worker tracking
    celery_task_id: Optional[str] = None
    priority: int = 0  # Lower = higher priority

    # Timestamps
    created_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None