""" Processor ABC and concrete implementations. Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4). """ import hashlib import time from abc import ABC, abstractmethod from pathlib import Path from typing import List from .exceptions import ChunkChecksumError from .models import Chunk, ChunkResult class Processor(ABC): """ Abstract base class for chunk processors. Each processor defines how a single chunk is processed. The Worker calls processor.process(chunk) and handles retries. """ @abstractmethod def process(self, chunk: Chunk) -> ChunkResult: """Process a single chunk and return the result.""" pass class FFmpegExtractProcessor(Processor): """ Extracts a time segment from the source file using FFmpeg stream copy. Produces a playable mp4 file per chunk — no re-encoding. Args: output_dir: Directory to write chunk mp4 files """ def __init__(self, output_dir: str): self.output_dir = output_dir Path(output_dir).mkdir(parents=True, exist_ok=True) def process(self, chunk: Chunk) -> ChunkResult: from core.ffmpeg.transcode import TranscodeConfig, transcode start = time.monotonic() output_file = str( Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4" ) config = TranscodeConfig( input_path=chunk.source_path, output_path=output_file, video_codec="copy", audio_codec="copy", trim_start=chunk.start_time, trim_end=chunk.end_time, ) transcode(config) # Compute checksum of output file md5 = hashlib.md5() with open(output_file, "rb") as f: for block in iter(lambda: f.read(8192), b""): md5.update(block) checksum = md5.hexdigest() elapsed = time.monotonic() - start return ChunkResult( sequence=chunk.sequence, success=True, checksum_valid=True, processing_time=elapsed, output_file=output_file, ) class ChecksumProcessor(Processor): """ Validates chunk metadata consistency. For time-based chunks, verifies the time range is valid. Raises ChunkChecksumError on invalid ranges. """ def process(self, chunk: Chunk) -> ChunkResult: start = time.monotonic() valid = chunk.duration > 0 and chunk.end_time > chunk.start_time if not valid: raise ChunkChecksumError( sequence=chunk.sequence, expected="valid time range", actual=f"{chunk.start_time}-{chunk.end_time}", ) elapsed = time.monotonic() - start return ChunkResult( sequence=chunk.sequence, success=True, checksum_valid=True, processing_time=elapsed, ) class SimulatedDecodeProcessor(Processor): """ Simulates decode work by sleeping proportional to chunk duration. Useful for demonstrating concurrency behavior without real FFmpeg. Args: ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100) """ def __init__(self, ms_per_second: float = 100.0): self.ms_per_second = ms_per_second def process(self, chunk: Chunk) -> ChunkResult: start = time.monotonic() sleep_time = (self.ms_per_second * chunk.duration) / 1000.0 time.sleep(sleep_time) elapsed = time.monotonic() - start return ChunkResult( sequence=chunk.sequence, success=True, checksum_valid=True, processing_time=elapsed, ) class CompositeProcessor(Processor): """ Chains multiple processors — runs each in sequence on the same chunk. Demonstrates OOP composition pattern. Args: processors: List of processors to chain """ def __init__(self, processors: List[Processor]): if not processors: raise ValueError("CompositeProcessor requires at least one processor") self.processors = processors def process(self, chunk: Chunk) -> ChunkResult: start = time.monotonic() last_result = None for proc in self.processors: last_result = proc.process(chunk) if not last_result.success: return last_result elapsed = time.monotonic() - start return ChunkResult( sequence=chunk.sequence, success=True, checksum_valid=last_result.checksum_valid if last_result else True, processing_time=elapsed, )