chunker and ui

This commit is contained in:
2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions

View File

@@ -0,0 +1,5 @@
"""Job handlers — type-specific execution logic."""
from .base import Handler
__all__ = ["Handler"]

View File

@@ -0,0 +1,33 @@
"""
Base Handler ABC — defines the interface for job-type-specific execution logic.
A Handler knows HOW to execute a specific kind of job (transcode, chunk, etc.).
The Executor decides WHERE to run it (local, Lambda, GCP).
"""
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Optional
class Handler(ABC):
"""Abstract base class for job handlers."""
@abstractmethod
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
"""
Execute job-specific logic.
Args:
job_id: Unique job identifier
payload: Job-type-specific configuration
progress_callback: Called with (percent, details_dict)
Returns:
Result dict with at least {"status": "completed"} or raises
"""
pass

119
core/jobs/handlers/chunk.py Normal file
View File

@@ -0,0 +1,119 @@
"""
ChunkHandler — job handler that wraps the chunker Pipeline.
Downloads source from S3/MinIO, runs FFmpeg chunking pipeline,
uploads mp4 segments + manifest back to S3/MinIO.
"""
import logging
import os
import shutil
import tempfile
from typing import Any, Callable, Dict, Optional
from core.chunker import Pipeline
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from .base import Handler
logger = logging.getLogger(__name__)
class ChunkHandler(Handler):
"""
Handles chunk processing jobs by delegating to the chunker Pipeline.
Expected payload keys:
source_key: str — S3 key of the source file in BUCKET_IN
chunk_duration: float — seconds per chunk (default: 10.0)
num_workers: int — concurrent workers (default: 4)
max_retries: int — retries per chunk (default: 3)
processor_type: str — "ffmpeg", "checksum", "simulated_decode", "composite"
queue_size: int — max queue depth (default: 10)
"""
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
source_key = payload["source_key"]
processor_type = payload.get("processor_type", "ffmpeg")
logger.info(f"ChunkHandler starting job {job_id}: {source_key}")
# Download source from S3/MinIO
tmp_source = download_to_temp(BUCKET_IN, source_key)
# Create temp output directory for chunks
tmp_output_dir = tempfile.mkdtemp(prefix=f"chunks-{job_id}-")
try:
def event_bridge(event_type: str, data: Dict[str, Any]) -> None:
"""Bridge pipeline events to the job progress callback."""
if progress_callback and event_type == "pipeline_complete":
progress_callback(100, data)
elif progress_callback and event_type == "chunk_done":
total = data.get("total_chunks", 1)
if total > 0:
pct = min(int((data.get("sequence", 0) + 1) / total * 100), 99)
progress_callback(pct, data)
pipeline = Pipeline(
source=tmp_source,
chunk_duration=payload.get("chunk_duration", 10.0),
num_workers=payload.get("num_workers", 4),
max_retries=payload.get("max_retries", 3),
processor_type=processor_type,
queue_size=payload.get("queue_size", 10),
event_callback=event_bridge,
output_dir=tmp_output_dir if processor_type == "ffmpeg" else None,
)
result = pipeline.run()
# Upload chunks + manifest to S3/MinIO
output_prefix = f"chunks/{job_id}"
uploaded_files = []
for chunk_file in result.chunk_files:
filename = os.path.basename(chunk_file)
output_key = f"{output_prefix}/{filename}"
upload_file(chunk_file, BUCKET_OUT, output_key)
uploaded_files.append(output_key)
logger.info(f"Uploaded {output_key}")
# Upload manifest
manifest_path = os.path.join(tmp_output_dir, "manifest.json")
if os.path.exists(manifest_path):
manifest_key = f"{output_prefix}/manifest.json"
upload_file(manifest_path, BUCKET_OUT, manifest_key)
uploaded_files.append(manifest_key)
logger.info(f"Uploaded {manifest_key}")
return {
"status": "completed" if result.failed == 0 else "completed_with_errors",
"total_chunks": result.total_chunks,
"processed": result.processed,
"failed": result.failed,
"retries": result.retries,
"elapsed_time": result.elapsed_time,
"throughput_mbps": result.throughput_mbps,
"worker_stats": result.worker_stats,
"errors": result.errors,
"chunks_in_order": result.chunks_in_order,
"output_prefix": output_prefix,
"uploaded_files": uploaded_files,
}
finally:
# Cleanup temp files
try:
os.unlink(tmp_source)
except OSError:
pass
try:
shutil.rmtree(tmp_output_dir, ignore_errors=True)
except OSError:
pass

View File

@@ -0,0 +1,104 @@
"""
TranscodeHandler — executes transcode/trim jobs using FFmpeg.
Extracted from the old tasks.py Celery task logic.
"""
import logging
import os
import tempfile
from pathlib import Path
from typing import Any, Callable, Dict, Optional
from core.ffmpeg.transcode import TranscodeConfig, transcode
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from .base import Handler
logger = logging.getLogger(__name__)
class TranscodeHandler(Handler):
"""Handle transcode and trim jobs via FFmpeg."""
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
source_key = payload["source_key"]
output_key = payload["output_key"]
preset = payload.get("preset")
trim_start = payload.get("trim_start")
trim_end = payload.get("trim_end")
duration = payload.get("duration")
logger.info(f"TranscodeHandler: {source_key} -> {output_key}")
# Download source
tmp_source = download_to_temp(BUCKET_IN, source_key)
ext = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext)
os.close(fd)
try:
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
if progress_callback:
progress_callback(int(percent), details)
success = transcode(
config,
duration=duration,
progress_callback=wrapped_callback if progress_callback else None,
)
if not success:
raise RuntimeError("Transcode returned False")
# Upload result
logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
upload_file(tmp_output, BUCKET_OUT, output_key)
return {
"status": "completed",
"job_id": job_id,
"output_key": output_key,
}
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass