chunker and ui

2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions
--- a/core/jobs/init.py
+++ b/core/jobs/init.py
@@ -0,0 +1,15 @@
+"""
+MPR Jobs Module
+
+Provides executor abstraction and task dispatch for job processing.
+"""
+
+from .executor import Executor, LocalExecutor, get_executor
+from .task import run_job
+
+__all__ = [
+    "Executor",
+    "LocalExecutor",
+    "get_executor",
+    "run_job",
+]
--- a/core/jobs/executor.py
+++ b/core/jobs/executor.py
@@ -0,0 +1,198 @@
+"""
+Executor abstraction for job processing.
+
+Determines WHERE jobs run:
+- LocalExecutor: delegates to registered Handler (default)
+- LambdaExecutor: AWS Step Functions
+- GCPExecutor: Google Cloud Run Jobs
+"""
+
+import os
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional
+
+# Configuration from environment
+MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
+
+
+class Executor(ABC):
+    """Abstract base class for job executors."""
+
+    @abstractmethod
+    def run(
+        self,
+        job_type: str,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """
+        Execute a job.
+
+        Args:
+            job_type: Type of job ("transcode", "chunk", etc.)
+            job_id: Unique job identifier
+            payload: Job-type-specific configuration dict
+            progress_callback: Called with (percent, details_dict)
+
+        Returns:
+            True if successful
+        """
+        pass
+
+
+class LocalExecutor(Executor):
+    """Execute jobs locally using registered handlers."""
+
+    def run(
+        self,
+        job_type: str,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Execute job using the appropriate local handler."""
+        from .registry import get_handler
+
+        handler = get_handler(job_type)
+        result = handler.process(
+            job_id=job_id,
+            payload=payload,
+            progress_callback=progress_callback,
+        )
+        return result.get("status") == "completed"
+
+
+class LambdaExecutor(Executor):
+    """Execute jobs via AWS Step Functions + Lambda."""
+
+    def __init__(self):
+        import boto3
+
+        region = os.environ.get("AWS_REGION", "us-east-1")
+        self.sfn = boto3.client("stepfunctions", region_name=region)
+        self.state_machine_arn = os.environ["STEP_FUNCTION_ARN"]
+        self.callback_url = os.environ.get("CALLBACK_URL", "")
+        self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
+
+    def run(
+        self,
+        job_type: str,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Start a Step Functions execution for this job."""
+        import json
+
+        sfn_payload = {
+            "job_type": job_type,
+            "job_id": job_id,
+            **payload,
+            "callback_url": self.callback_url,
+            "api_key": self.callback_api_key,
+        }
+
+        response = self.sfn.start_execution(
+            stateMachineArn=self.state_machine_arn,
+            name=f"mpr-{job_id}",
+            input=json.dumps(sfn_payload),
+        )
+
+        execution_arn = response["executionArn"]
+        try:
+            from core.db import update_job_fields
+            update_job_fields(job_id, execution_arn=execution_arn)
+        except Exception:
+            pass
+
+        return True
+
+
+class GCPExecutor(Executor):
+    """Execute jobs via Google Cloud Run Jobs."""
+
+    def __init__(self):
+        from google.cloud import run_v2
+
+        self.client = run_v2.JobsClient()
+        self.project_id = os.environ["GCP_PROJECT_ID"]
+        self.region = os.environ.get("GCP_REGION", "us-central1")
+        self.job_name = os.environ["CLOUD_RUN_JOB"]
+        self.callback_url = os.environ.get("CALLBACK_URL", "")
+        self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
+
+    def run(
+        self,
+        job_type: str,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Trigger a Cloud Run Job execution for this job."""
+        import json
+
+        from google.cloud import run_v2
+
+        gcp_payload = {
+            "job_type": job_type,
+            "job_id": job_id,
+            **payload,
+            "callback_url": self.callback_url,
+            "api_key": self.callback_api_key,
+        }
+
+        job_path = (
+            f"projects/{self.project_id}/locations/{self.region}/jobs/{self.job_name}"
+        )
+
+        request = run_v2.RunJobRequest(
+            name=job_path,
+            overrides=run_v2.RunJobRequest.Overrides(
+                container_overrides=[
+                    run_v2.RunJobRequest.Overrides.ContainerOverride(
+                        env=[
+                            run_v2.EnvVar(
+                                name="MPR_JOB_PAYLOAD",
+                                value=json.dumps(gcp_payload),
+                            )
+                        ]
+                    )
+                ]
+            ),
+        )
+
+        operation = self.client.run_job(request=request)
+        execution_name = operation.metadata.name
+
+        try:
+            from core.db import update_job_fields
+
+            update_job_fields(job_id, execution_arn=execution_name)
+        except Exception:
+            pass
+
+        return True
+
+
+# Executor registry
+_executors: Dict[str, type] = {
+    "local": LocalExecutor,
+    "lambda": LambdaExecutor,
+    "gcp": GCPExecutor,
+}
+
+_executor_instance: Optional[Executor] = None
+
+
+def get_executor() -> Executor:
+    """Get the configured executor instance."""
+    global _executor_instance
+
+    if _executor_instance is None:
+        executor_type = MPR_EXECUTOR.lower()
+        if executor_type not in _executors:
+            raise ValueError(f"Unknown executor type: {executor_type}")
+        _executor_instance = _executors[executor_type]()
+
+    return _executor_instance
--- a/core/jobs/gcp_handler.py
+++ b/core/jobs/gcp_handler.py
@@ -0,0 +1,121 @@
+"""
+Google Cloud Run Job handler for media transcoding.
+
+Reads job payload from the MPR_JOB_PAYLOAD env var (injected by GCPExecutor),
+downloads source from S3-compatible storage (GCS via HMAC + S3 API),
+runs FFmpeg, uploads result, and calls back to the API.
+
+Uses core/storage and core/ffmpeg — same modules as the Celery worker.
+No cloud-provider SDK required here; storage goes through core.storage (boto3 + S3 compat).
+
+Entry point: python -m task.gcp_handler  (set as Cloud Run Job command)
+"""
+
+import json
+import logging
+import os
+import sys
+import tempfile
+from pathlib import Path
+
+import requests
+
+from core.ffmpeg.transcode import TranscodeConfig, transcode
+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def main() -> None:
+    raw = os.environ.get("MPR_JOB_PAYLOAD")
+    if not raw:
+        logger.error("MPR_JOB_PAYLOAD not set")
+        sys.exit(1)
+
+    event = json.loads(raw)
+    job_id = event["job_id"]
+    source_key = event["source_key"]
+    output_key = event["output_key"]
+    preset = event.get("preset")
+    trim_start = event.get("trim_start")
+    trim_end = event.get("trim_end")
+    duration = event.get("duration")
+    callback_url = event.get("callback_url", "")
+    api_key = event.get("api_key", "")
+
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    tmp_source = download_to_temp(BUCKET_IN, source_key)
+    ext_out = Path(output_key).suffix or ".mp4"
+    fd, tmp_output = tempfile.mkstemp(suffix=ext_out)
+    os.close(fd)
+
+    try:
+        if preset:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        success = transcode(config, duration=duration)
+        if not success:
+            raise RuntimeError("Transcode returned False")
+
+        logger.info(f"Uploading to {BUCKET_OUT}/{output_key}")
+        upload_file(tmp_output, BUCKET_OUT, output_key)
+
+        _callback(callback_url, job_id, api_key, {"status": "completed"})
+        logger.info(f"Job {job_id} completed")
+        sys.exit(0)
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+        _callback(callback_url, job_id, api_key, {"status": "failed", "error": str(e)})
+        sys.exit(1)
+
+    finally:
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
+
+def _callback(callback_url: str, job_id: str, api_key: str, payload: dict) -> None:
+    if not callback_url:
+        return
+    try:
+        url = f"{callback_url}/jobs/{job_id}/callback"
+        headers = {"X-API-Key": api_key} if api_key else {}
+        resp = requests.post(url, json=payload, headers=headers, timeout=10)
+        logger.info(f"Callback response: {resp.status_code}")
+    except Exception as e:
+        logger.warning(f"Callback failed: {e}")
+
+
+if __name__ == "__main__":
+    main()
--- a/core/jobs/handlers/init.py
+++ b/core/jobs/handlers/init.py
@@ -0,0 +1,5 @@
+"""Job handlers — type-specific execution logic."""
+
+from .base import Handler
+
+__all__ = ["Handler"]
--- a/core/jobs/handlers/base.py
+++ b/core/jobs/handlers/base.py
@@ -0,0 +1,33 @@
+"""
+Base Handler ABC — defines the interface for job-type-specific execution logic.
+
+A Handler knows HOW to execute a specific kind of job (transcode, chunk, etc.).
+The Executor decides WHERE to run it (local, Lambda, GCP).
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional
+
+
+class Handler(ABC):
+    """Abstract base class for job handlers."""
+
+    @abstractmethod
+    def process(
+        self,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Execute job-specific logic.
+
+        Args:
+            job_id: Unique job identifier
+            payload: Job-type-specific configuration
+            progress_callback: Called with (percent, details_dict)
+
+        Returns:
+            Result dict with at least {"status": "completed"} or raises
+        """
+        pass
--- a/core/jobs/handlers/chunk.py
+++ b/core/jobs/handlers/chunk.py
@@ -0,0 +1,119 @@
+"""
+ChunkHandler — job handler that wraps the chunker Pipeline.
+
+Downloads source from S3/MinIO, runs FFmpeg chunking pipeline,
+uploads mp4 segments + manifest back to S3/MinIO.
+"""
+
+import logging
+import os
+import shutil
+import tempfile
+from typing import Any, Callable, Dict, Optional
+
+from core.chunker import Pipeline
+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
+
+from .base import Handler
+
+logger = logging.getLogger(__name__)
+
+
+class ChunkHandler(Handler):
+    """
+    Handles chunk processing jobs by delegating to the chunker Pipeline.
+
+    Expected payload keys:
+        source_key: str — S3 key of the source file in BUCKET_IN
+        chunk_duration: float — seconds per chunk (default: 10.0)
+        num_workers: int — concurrent workers (default: 4)
+        max_retries: int — retries per chunk (default: 3)
+        processor_type: str — "ffmpeg", "checksum", "simulated_decode", "composite"
+        queue_size: int — max queue depth (default: 10)
+    """
+
+    def process(
+        self,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> Dict[str, Any]:
+        source_key = payload["source_key"]
+        processor_type = payload.get("processor_type", "ffmpeg")
+
+        logger.info(f"ChunkHandler starting job {job_id}: {source_key}")
+
+        # Download source from S3/MinIO
+        tmp_source = download_to_temp(BUCKET_IN, source_key)
+
+        # Create temp output directory for chunks
+        tmp_output_dir = tempfile.mkdtemp(prefix=f"chunks-{job_id}-")
+
+        try:
+            def event_bridge(event_type: str, data: Dict[str, Any]) -> None:
+                """Bridge pipeline events to the job progress callback."""
+                if progress_callback and event_type == "pipeline_complete":
+                    progress_callback(100, data)
+                elif progress_callback and event_type == "chunk_done":
+                    total = data.get("total_chunks", 1)
+                    if total > 0:
+                        pct = min(int((data.get("sequence", 0) + 1) / total * 100), 99)
+                        progress_callback(pct, data)
+
+            pipeline = Pipeline(
+                source=tmp_source,
+                chunk_duration=payload.get("chunk_duration", 10.0),
+                num_workers=payload.get("num_workers", 4),
+                max_retries=payload.get("max_retries", 3),
+                processor_type=processor_type,
+                queue_size=payload.get("queue_size", 10),
+                event_callback=event_bridge,
+                output_dir=tmp_output_dir if processor_type == "ffmpeg" else None,
+            )
+
+            result = pipeline.run()
+
+            # Upload chunks + manifest to S3/MinIO
+            output_prefix = f"chunks/{job_id}"
+            uploaded_files = []
+
+            for chunk_file in result.chunk_files:
+                filename = os.path.basename(chunk_file)
+                output_key = f"{output_prefix}/{filename}"
+                upload_file(chunk_file, BUCKET_OUT, output_key)
+                uploaded_files.append(output_key)
+                logger.info(f"Uploaded {output_key}")
+
+            # Upload manifest
+            manifest_path = os.path.join(tmp_output_dir, "manifest.json")
+            if os.path.exists(manifest_path):
+                manifest_key = f"{output_prefix}/manifest.json"
+                upload_file(manifest_path, BUCKET_OUT, manifest_key)
+                uploaded_files.append(manifest_key)
+                logger.info(f"Uploaded {manifest_key}")
+
+            return {
+                "status": "completed" if result.failed == 0 else "completed_with_errors",
+                "total_chunks": result.total_chunks,
+                "processed": result.processed,
+                "failed": result.failed,
+                "retries": result.retries,
+                "elapsed_time": result.elapsed_time,
+                "throughput_mbps": result.throughput_mbps,
+                "worker_stats": result.worker_stats,
+                "errors": result.errors,
+                "chunks_in_order": result.chunks_in_order,
+                "output_prefix": output_prefix,
+                "uploaded_files": uploaded_files,
+            }
+
+        finally:
+            # Cleanup temp files
+            try:
+                os.unlink(tmp_source)
+            except OSError:
+                pass
+            try:
+                shutil.rmtree(tmp_output_dir, ignore_errors=True)
+            except OSError:
+                pass
--- a/core/jobs/handlers/transcode.py
+++ b/core/jobs/handlers/transcode.py
@@ -0,0 +1,104 @@
+"""
+TranscodeHandler — executes transcode/trim jobs using FFmpeg.
+
+Extracted from the old tasks.py Celery task logic.
+"""
+
+import logging
+import os
+import tempfile
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional
+
+from core.ffmpeg.transcode import TranscodeConfig, transcode
+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
+
+from .base import Handler
+
+logger = logging.getLogger(__name__)
+
+
+class TranscodeHandler(Handler):
+    """Handle transcode and trim jobs via FFmpeg."""
+
+    def process(
+        self,
+        job_id: str,
+        payload: Dict[str, Any],
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> Dict[str, Any]:
+        source_key = payload["source_key"]
+        output_key = payload["output_key"]
+        preset = payload.get("preset")
+        trim_start = payload.get("trim_start")
+        trim_end = payload.get("trim_end")
+        duration = payload.get("duration")
+
+        logger.info(f"TranscodeHandler: {source_key} -> {output_key}")
+
+        # Download source
+        tmp_source = download_to_temp(BUCKET_IN, source_key)
+
+        ext = Path(output_key).suffix or ".mp4"
+        fd, tmp_output = tempfile.mkstemp(suffix=ext)
+        os.close(fd)
+
+        try:
+            if preset:
+                config = TranscodeConfig(
+                    input_path=tmp_source,
+                    output_path=tmp_output,
+                    video_codec=preset.get("video_codec", "libx264"),
+                    video_bitrate=preset.get("video_bitrate"),
+                    video_crf=preset.get("video_crf"),
+                    video_preset=preset.get("video_preset"),
+                    resolution=preset.get("resolution"),
+                    framerate=preset.get("framerate"),
+                    audio_codec=preset.get("audio_codec", "aac"),
+                    audio_bitrate=preset.get("audio_bitrate"),
+                    audio_channels=preset.get("audio_channels"),
+                    audio_samplerate=preset.get("audio_samplerate"),
+                    container=preset.get("container", "mp4"),
+                    extra_args=preset.get("extra_args", []),
+                    trim_start=trim_start,
+                    trim_end=trim_end,
+                )
+            else:
+                config = TranscodeConfig(
+                    input_path=tmp_source,
+                    output_path=tmp_output,
+                    video_codec="copy",
+                    audio_codec="copy",
+                    trim_start=trim_start,
+                    trim_end=trim_end,
+                )
+
+            def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
+                if progress_callback:
+                    progress_callback(int(percent), details)
+
+            success = transcode(
+                config,
+                duration=duration,
+                progress_callback=wrapped_callback if progress_callback else None,
+            )
+
+            if not success:
+                raise RuntimeError("Transcode returned False")
+
+            # Upload result
+            logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
+            upload_file(tmp_output, BUCKET_OUT, output_key)
+
+            return {
+                "status": "completed",
+                "job_id": job_id,
+                "output_key": output_key,
+            }
+
+        finally:
+            for f in [tmp_source, tmp_output]:
+                try:
+                    os.unlink(f)
+                except OSError:
+                    pass
--- a/core/jobs/lambda_handler.py
+++ b/core/jobs/lambda_handler.py
@@ -0,0 +1,148 @@
+"""
+AWS Lambda handler for media transcoding.
+
+Receives a job payload from Step Functions, downloads source from S3,
+runs FFmpeg, uploads result to S3, and calls back to the API.
+
+Uses the same core/ffmpeg module as the local Celery worker.
+"""
+
+import json
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+import boto3
+import requests
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+# S3 config
+S3_BUCKET_IN = os.environ.get("S3_BUCKET_IN", "mpr-media-in")
+S3_BUCKET_OUT = os.environ.get("S3_BUCKET_OUT", "mpr-media-out")
+AWS_REGION = os.environ.get("AWS_REGION", "us-east-1")
+
+s3 = boto3.client("s3", region_name=AWS_REGION)
+
+
+def handler(event, context):
+    """
+    Lambda entry point.
+
+    Event payload (from Step Functions):
+    {
+        "job_id": "uuid",
+        "source_key": "path/to/source.mp4",
+        "output_key": "output_filename.mp4",
+        "preset": {...} or null,
+        "trim_start": float or null,
+        "trim_end": float or null,
+        "duration": float or null,
+        "callback_url": "https://mpr.mcrn.ar/api",
+        "api_key": "secret"
+    }
+    """
+    job_id = event["job_id"]
+    source_key = event["source_key"]
+    output_key = event["output_key"]
+    preset = event.get("preset")
+    trim_start = event.get("trim_start")
+    trim_end = event.get("trim_end")
+    duration = event.get("duration")
+    callback_url = event.get("callback_url", "")
+    api_key = event.get("api_key", "")
+
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    # Download source from S3
+    ext_in = Path(source_key).suffix or ".mp4"
+    tmp_source = tempfile.mktemp(suffix=ext_in, dir="/tmp")
+    logger.info(f"Downloading s3://{S3_BUCKET_IN}/{source_key}")
+    s3.download_file(S3_BUCKET_IN, source_key, tmp_source)
+
+    # Prepare output temp file
+    ext_out = Path(output_key).suffix or ".mp4"
+    tmp_output = tempfile.mktemp(suffix=ext_out, dir="/tmp")
+
+    try:
+        # Import ffmpeg module (bundled in container)
+        from core.ffmpeg.transcode import TranscodeConfig, transcode
+
+        if preset:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        success = transcode(config, duration=duration)
+
+        if not success:
+            raise RuntimeError("Transcode returned False")
+
+        # Upload result to S3
+        logger.info(f"Uploading s3://{S3_BUCKET_OUT}/{output_key}")
+        s3.upload_file(tmp_output, S3_BUCKET_OUT, output_key)
+
+        result = {"status": "completed", "job_id": job_id, "output_key": output_key}
+
+        # Callback to API
+        _callback(callback_url, job_id, api_key, {"status": "completed"})
+
+        return result
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+
+        _callback(callback_url, job_id, api_key, {
+            "status": "failed",
+            "error": str(e),
+        })
+
+        return {"status": "failed", "job_id": job_id, "error": str(e)}
+
+    finally:
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
+
+def _callback(callback_url, job_id, api_key, payload):
+    """Call back to API with job result."""
+    if not callback_url:
+        return
+    try:
+        url = f"{callback_url}/jobs/{job_id}/callback"
+        headers = {}
+        if api_key:
+            headers["X-API-Key"] = api_key
+        resp = requests.post(url, json=payload, headers=headers, timeout=10)
+        logger.info(f"Callback response: {resp.status_code}")
+    except Exception as e:
+        logger.warning(f"Callback failed: {e}")
--- a/core/jobs/registry.py
+++ b/core/jobs/registry.py
@@ -0,0 +1,33 @@
+"""
+Handler registry — maps job_type strings to Handler classes.
+"""
+
+from typing import Dict, Type
+
+from .handlers.base import Handler
+
+_handlers: Dict[str, Type[Handler]] = {}
+
+
+def register_handler(job_type: str, handler_class: Type[Handler]) -> None:
+    """Register a handler class for a job type."""
+    _handlers[job_type] = handler_class
+
+
+def get_handler(job_type: str) -> Handler:
+    """Get an instantiated handler for a job type."""
+    if job_type not in _handlers:
+        raise ValueError(f"Unknown job type: {job_type}")
+    return _handlers[job_type]()
+
+
+def _register_defaults() -> None:
+    """Register built-in handlers."""
+    from .handlers.chunk import ChunkHandler
+    from .handlers.transcode import TranscodeHandler
+
+    register_handler("transcode", TranscodeHandler)
+    register_handler("chunk", ChunkHandler)
+
+
+_register_defaults()
--- a/core/jobs/task.py
+++ b/core/jobs/task.py
@@ -0,0 +1,64 @@
+"""
+Celery task for job processing.
+
+Generic dispatcher — routes to the appropriate handler based on job_type.
+"""
+
+import logging
+from typing import Any, Dict
+
+from celery import shared_task
+
+from core.rpc.server import update_job_progress
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task(bind=True, max_retries=3, default_retry_delay=60)
+def run_job(
+    self,
+    job_type: str,
+    job_id: str,
+    payload: Dict[str, Any],
+) -> Dict[str, Any]:
+    """
+    Generic Celery task — dispatches to the registered handler for job_type.
+    """
+    logger.info(f"Starting {job_type} job {job_id}")
+
+    update_job_progress(job_id, progress=0, status="processing")
+
+    def progress_callback(percent: int, details: Dict[str, Any]) -> None:
+        update_job_progress(
+            job_id,
+            progress=percent,
+            current_time=details.get("time", 0.0),
+            status="processing",
+        )
+
+    try:
+        from .registry import get_handler
+
+        handler = get_handler(job_type)
+        result = handler.process(
+            job_id=job_id,
+            payload=payload,
+            progress_callback=progress_callback,
+        )
+
+        logger.info(f"Job {job_id} completed successfully")
+        update_job_progress(job_id, progress=100, status="completed")
+        return result
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+        update_job_progress(job_id, progress=0, status="failed", error=str(e))
+
+        if self.request.retries < self.max_retries:
+            raise self.retry(exc=e)
+
+        return {
+            "status": "failed",
+            "job_id": job_id,
+            "error": str(e),
+        }