chunker and ui
This commit is contained in:
173
core/chunker/processor.py
Normal file
173
core/chunker/processor.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Processor ABC and concrete implementations.
|
||||
|
||||
Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4).
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from .exceptions import ChunkChecksumError
|
||||
from .models import Chunk, ChunkResult
|
||||
|
||||
|
||||
class Processor(ABC):
|
||||
"""
|
||||
Abstract base class for chunk processors.
|
||||
|
||||
Each processor defines how a single chunk is processed.
|
||||
The Worker calls processor.process(chunk) and handles retries.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
"""Process a single chunk and return the result."""
|
||||
pass
|
||||
|
||||
|
||||
class FFmpegExtractProcessor(Processor):
|
||||
"""
|
||||
Extracts a time segment from the source file using FFmpeg stream copy.
|
||||
|
||||
Produces a playable mp4 file per chunk — no re-encoding.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to write chunk mp4 files
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir: str):
|
||||
self.output_dir = output_dir
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
from core.ffmpeg.transcode import TranscodeConfig, transcode
|
||||
|
||||
start = time.monotonic()
|
||||
|
||||
output_file = str(
|
||||
Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4"
|
||||
)
|
||||
|
||||
config = TranscodeConfig(
|
||||
input_path=chunk.source_path,
|
||||
output_path=output_file,
|
||||
video_codec="copy",
|
||||
audio_codec="copy",
|
||||
trim_start=chunk.start_time,
|
||||
trim_end=chunk.end_time,
|
||||
)
|
||||
|
||||
transcode(config)
|
||||
|
||||
# Compute checksum of output file
|
||||
md5 = hashlib.md5()
|
||||
with open(output_file, "rb") as f:
|
||||
for block in iter(lambda: f.read(8192), b""):
|
||||
md5.update(block)
|
||||
checksum = md5.hexdigest()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=True,
|
||||
checksum_valid=True,
|
||||
processing_time=elapsed,
|
||||
output_file=output_file,
|
||||
)
|
||||
|
||||
|
||||
class ChecksumProcessor(Processor):
|
||||
"""
|
||||
Validates chunk metadata consistency.
|
||||
|
||||
For time-based chunks, verifies the time range is valid.
|
||||
Raises ChunkChecksumError on invalid ranges.
|
||||
"""
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
start = time.monotonic()
|
||||
|
||||
valid = chunk.duration > 0 and chunk.end_time > chunk.start_time
|
||||
|
||||
if not valid:
|
||||
raise ChunkChecksumError(
|
||||
sequence=chunk.sequence,
|
||||
expected="valid time range",
|
||||
actual=f"{chunk.start_time}-{chunk.end_time}",
|
||||
)
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=True,
|
||||
checksum_valid=True,
|
||||
processing_time=elapsed,
|
||||
)
|
||||
|
||||
|
||||
class SimulatedDecodeProcessor(Processor):
|
||||
"""
|
||||
Simulates decode work by sleeping proportional to chunk duration.
|
||||
|
||||
Useful for demonstrating concurrency behavior without real FFmpeg.
|
||||
|
||||
Args:
|
||||
ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100)
|
||||
"""
|
||||
|
||||
def __init__(self, ms_per_second: float = 100.0):
|
||||
self.ms_per_second = ms_per_second
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
start = time.monotonic()
|
||||
|
||||
sleep_time = (self.ms_per_second * chunk.duration) / 1000.0
|
||||
time.sleep(sleep_time)
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=True,
|
||||
checksum_valid=True,
|
||||
processing_time=elapsed,
|
||||
)
|
||||
|
||||
|
||||
class CompositeProcessor(Processor):
|
||||
"""
|
||||
Chains multiple processors — runs each in sequence on the same chunk.
|
||||
|
||||
Demonstrates OOP composition pattern.
|
||||
|
||||
Args:
|
||||
processors: List of processors to chain
|
||||
"""
|
||||
|
||||
def __init__(self, processors: List[Processor]):
|
||||
if not processors:
|
||||
raise ValueError("CompositeProcessor requires at least one processor")
|
||||
self.processors = processors
|
||||
|
||||
def process(self, chunk: Chunk) -> ChunkResult:
|
||||
start = time.monotonic()
|
||||
last_result = None
|
||||
|
||||
for proc in self.processors:
|
||||
last_result = proc.process(chunk)
|
||||
if not last_result.success:
|
||||
return last_result
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=True,
|
||||
checksum_valid=last_result.checksum_valid if last_result else True,
|
||||
processing_time=elapsed,
|
||||
)
|
||||
Reference in New Issue
Block a user