Files
mediaproc/core/chunker/processor.py
2026-03-13 14:29:38 -03:00

174 lines
4.6 KiB
Python

"""
Processor ABC and concrete implementations.
Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4).
"""
import hashlib
import time
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List
from .exceptions import ChunkChecksumError
from .models import Chunk, ChunkResult
class Processor(ABC):
"""
Abstract base class for chunk processors.
Each processor defines how a single chunk is processed.
The Worker calls processor.process(chunk) and handles retries.
"""
@abstractmethod
def process(self, chunk: Chunk) -> ChunkResult:
"""Process a single chunk and return the result."""
pass
class FFmpegExtractProcessor(Processor):
"""
Extracts a time segment from the source file using FFmpeg stream copy.
Produces a playable mp4 file per chunk — no re-encoding.
Args:
output_dir: Directory to write chunk mp4 files
"""
def __init__(self, output_dir: str):
self.output_dir = output_dir
Path(output_dir).mkdir(parents=True, exist_ok=True)
def process(self, chunk: Chunk) -> ChunkResult:
from core.ffmpeg.transcode import TranscodeConfig, transcode
start = time.monotonic()
output_file = str(
Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4"
)
config = TranscodeConfig(
input_path=chunk.source_path,
output_path=output_file,
video_codec="copy",
audio_codec="copy",
trim_start=chunk.start_time,
trim_end=chunk.end_time,
)
transcode(config)
# Compute checksum of output file
md5 = hashlib.md5()
with open(output_file, "rb") as f:
for block in iter(lambda: f.read(8192), b""):
md5.update(block)
checksum = md5.hexdigest()
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
output_file=output_file,
)
class ChecksumProcessor(Processor):
"""
Validates chunk metadata consistency.
For time-based chunks, verifies the time range is valid.
Raises ChunkChecksumError on invalid ranges.
"""
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
valid = chunk.duration > 0 and chunk.end_time > chunk.start_time
if not valid:
raise ChunkChecksumError(
sequence=chunk.sequence,
expected="valid time range",
actual=f"{chunk.start_time}-{chunk.end_time}",
)
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
)
class SimulatedDecodeProcessor(Processor):
"""
Simulates decode work by sleeping proportional to chunk duration.
Useful for demonstrating concurrency behavior without real FFmpeg.
Args:
ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100)
"""
def __init__(self, ms_per_second: float = 100.0):
self.ms_per_second = ms_per_second
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
sleep_time = (self.ms_per_second * chunk.duration) / 1000.0
time.sleep(sleep_time)
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
)
class CompositeProcessor(Processor):
"""
Chains multiple processors — runs each in sequence on the same chunk.
Demonstrates OOP composition pattern.
Args:
processors: List of processors to chain
"""
def __init__(self, processors: List[Processor]):
if not processors:
raise ValueError("CompositeProcessor requires at least one processor")
self.processors = processors
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
last_result = None
for proc in self.processors:
last_result = proc.process(chunk)
if not last_result.success:
return last_result
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=last_result.checksum_valid if last_result else True,
processing_time=elapsed,
)