174 lines
4.6 KiB
Python
174 lines
4.6 KiB
Python
"""
|
|
Processor ABC and concrete implementations.
|
|
|
|
Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4).
|
|
"""
|
|
|
|
import hashlib
|
|
import time
|
|
from abc import ABC, abstractmethod
|
|
from pathlib import Path
|
|
from typing import List
|
|
|
|
from .exceptions import ChunkChecksumError
|
|
from .models import Chunk, ChunkResult
|
|
|
|
|
|
class Processor(ABC):
|
|
"""
|
|
Abstract base class for chunk processors.
|
|
|
|
Each processor defines how a single chunk is processed.
|
|
The Worker calls processor.process(chunk) and handles retries.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def process(self, chunk: Chunk) -> ChunkResult:
|
|
"""Process a single chunk and return the result."""
|
|
pass
|
|
|
|
|
|
class FFmpegExtractProcessor(Processor):
|
|
"""
|
|
Extracts a time segment from the source file using FFmpeg stream copy.
|
|
|
|
Produces a playable mp4 file per chunk — no re-encoding.
|
|
|
|
Args:
|
|
output_dir: Directory to write chunk mp4 files
|
|
"""
|
|
|
|
def __init__(self, output_dir: str):
|
|
self.output_dir = output_dir
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
def process(self, chunk: Chunk) -> ChunkResult:
|
|
from core.ffmpeg.transcode import TranscodeConfig, transcode
|
|
|
|
start = time.monotonic()
|
|
|
|
output_file = str(
|
|
Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4"
|
|
)
|
|
|
|
config = TranscodeConfig(
|
|
input_path=chunk.source_path,
|
|
output_path=output_file,
|
|
video_codec="copy",
|
|
audio_codec="copy",
|
|
trim_start=chunk.start_time,
|
|
trim_end=chunk.end_time,
|
|
)
|
|
|
|
transcode(config)
|
|
|
|
# Compute checksum of output file
|
|
md5 = hashlib.md5()
|
|
with open(output_file, "rb") as f:
|
|
for block in iter(lambda: f.read(8192), b""):
|
|
md5.update(block)
|
|
checksum = md5.hexdigest()
|
|
|
|
elapsed = time.monotonic() - start
|
|
|
|
return ChunkResult(
|
|
sequence=chunk.sequence,
|
|
success=True,
|
|
checksum_valid=True,
|
|
processing_time=elapsed,
|
|
output_file=output_file,
|
|
)
|
|
|
|
|
|
class ChecksumProcessor(Processor):
|
|
"""
|
|
Validates chunk metadata consistency.
|
|
|
|
For time-based chunks, verifies the time range is valid.
|
|
Raises ChunkChecksumError on invalid ranges.
|
|
"""
|
|
|
|
def process(self, chunk: Chunk) -> ChunkResult:
|
|
start = time.monotonic()
|
|
|
|
valid = chunk.duration > 0 and chunk.end_time > chunk.start_time
|
|
|
|
if not valid:
|
|
raise ChunkChecksumError(
|
|
sequence=chunk.sequence,
|
|
expected="valid time range",
|
|
actual=f"{chunk.start_time}-{chunk.end_time}",
|
|
)
|
|
|
|
elapsed = time.monotonic() - start
|
|
|
|
return ChunkResult(
|
|
sequence=chunk.sequence,
|
|
success=True,
|
|
checksum_valid=True,
|
|
processing_time=elapsed,
|
|
)
|
|
|
|
|
|
class SimulatedDecodeProcessor(Processor):
|
|
"""
|
|
Simulates decode work by sleeping proportional to chunk duration.
|
|
|
|
Useful for demonstrating concurrency behavior without real FFmpeg.
|
|
|
|
Args:
|
|
ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100)
|
|
"""
|
|
|
|
def __init__(self, ms_per_second: float = 100.0):
|
|
self.ms_per_second = ms_per_second
|
|
|
|
def process(self, chunk: Chunk) -> ChunkResult:
|
|
start = time.monotonic()
|
|
|
|
sleep_time = (self.ms_per_second * chunk.duration) / 1000.0
|
|
time.sleep(sleep_time)
|
|
|
|
elapsed = time.monotonic() - start
|
|
|
|
return ChunkResult(
|
|
sequence=chunk.sequence,
|
|
success=True,
|
|
checksum_valid=True,
|
|
processing_time=elapsed,
|
|
)
|
|
|
|
|
|
class CompositeProcessor(Processor):
|
|
"""
|
|
Chains multiple processors — runs each in sequence on the same chunk.
|
|
|
|
Demonstrates OOP composition pattern.
|
|
|
|
Args:
|
|
processors: List of processors to chain
|
|
"""
|
|
|
|
def __init__(self, processors: List[Processor]):
|
|
if not processors:
|
|
raise ValueError("CompositeProcessor requires at least one processor")
|
|
self.processors = processors
|
|
|
|
def process(self, chunk: Chunk) -> ChunkResult:
|
|
start = time.monotonic()
|
|
last_result = None
|
|
|
|
for proc in self.processors:
|
|
last_result = proc.process(chunk)
|
|
if not last_result.success:
|
|
return last_result
|
|
|
|
elapsed = time.monotonic() - start
|
|
|
|
return ChunkResult(
|
|
sequence=chunk.sequence,
|
|
success=True,
|
|
checksum_valid=last_result.checksum_valid if last_result else True,
|
|
processing_time=elapsed,
|
|
)
|