major refactor

2026-03-13 01:07:02 -03:00
parent eaaf2ad60c
commit 3eeedebb15
61 changed files with 441 additions and 242 deletions
--- a/core/task/init.py
+++ b/core/task/init.py
@@ -0,0 +1,15 @@
+"""
+MPR Worker Module
+
+Provides executor abstraction and Celery tasks for job processing.
+"""
+
+from .executor import Executor, LocalExecutor, get_executor
+from .tasks import run_transcode_job
+
+__all__ = [
+    "Executor",
+    "LocalExecutor",
+    "get_executor",
+    "run_transcode_job",
+]
--- a/core/task/executor.py
+++ b/core/task/executor.py
@@ -0,0 +1,260 @@
+"""
+Executor abstraction for job processing.
+
+Supports different backends:
+- LocalExecutor: FFmpeg via Celery (default)
+- LambdaExecutor: AWS Lambda (future)
+"""
+
+import os
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional
+
+from core.ffmpeg.transcode import TranscodeConfig, transcode
+
+# Configuration from environment
+MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
+
+
+class Executor(ABC):
+    """Abstract base class for job executors."""
+
+    @abstractmethod
+    def run(
+        self,
+        job_id: str,
+        source_path: str,
+        output_path: str,
+        preset: Optional[Dict[str, Any]] = None,
+        trim_start: Optional[float] = None,
+        trim_end: Optional[float] = None,
+        duration: Optional[float] = None,
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """
+        Execute a transcode/trim job.
+
+        Args:
+            job_id: Unique job identifier
+            source_path: Path to source file
+            output_path: Path for output file
+            preset: Transcode preset dict (optional, None = trim only)
+            trim_start: Trim start time in seconds (optional)
+            trim_end: Trim end time in seconds (optional)
+            duration: Source duration in seconds (for progress calculation)
+            progress_callback: Called with (percent, details_dict)
+
+        Returns:
+            True if successful
+        """
+        pass
+
+
+class LocalExecutor(Executor):
+    """Execute jobs locally using FFmpeg."""
+
+    def run(
+        self,
+        job_id: str,
+        source_path: str,
+        output_path: str,
+        preset: Optional[Dict[str, Any]] = None,
+        trim_start: Optional[float] = None,
+        trim_end: Optional[float] = None,
+        duration: Optional[float] = None,
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Execute job using local FFmpeg."""
+
+        # Build config from preset or use stream copy for trim-only
+        if preset:
+            config = TranscodeConfig(
+                input_path=source_path,
+                output_path=output_path,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            # Trim-only: stream copy
+            config = TranscodeConfig(
+                input_path=source_path,
+                output_path=output_path,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        # Wrapper to convert float percent to int
+        def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
+            if progress_callback:
+                progress_callback(int(percent), details)
+
+        return transcode(
+            config,
+            duration=duration,
+            progress_callback=wrapped_callback if progress_callback else None,
+        )
+
+
+class LambdaExecutor(Executor):
+    """Execute jobs via AWS Step Functions + Lambda."""
+
+    def __init__(self):
+        import boto3
+
+        region = os.environ.get("AWS_REGION", "us-east-1")
+        self.sfn = boto3.client("stepfunctions", region_name=region)
+        self.state_machine_arn = os.environ["STEP_FUNCTION_ARN"]
+        self.callback_url = os.environ.get("CALLBACK_URL", "")
+        self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
+
+    def run(
+        self,
+        job_id: str,
+        source_path: str,
+        output_path: str,
+        preset: Optional[Dict[str, Any]] = None,
+        trim_start: Optional[float] = None,
+        trim_end: Optional[float] = None,
+        duration: Optional[float] = None,
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Start a Step Functions execution for this job."""
+        import json
+
+        payload = {
+            "job_id": job_id,
+            "source_key": source_path,
+            "output_key": output_path,
+            "preset": preset,
+            "trim_start": trim_start,
+            "trim_end": trim_end,
+            "duration": duration,
+            "callback_url": self.callback_url,
+            "api_key": self.callback_api_key,
+        }
+
+        response = self.sfn.start_execution(
+            stateMachineArn=self.state_machine_arn,
+            name=f"mpr-{job_id}",
+            input=json.dumps(payload),
+        )
+
+        # Store execution ARN on the job
+        execution_arn = response["executionArn"]
+        try:
+            from core.db import update_job_fields
+            update_job_fields(job_id, execution_arn=execution_arn)
+        except Exception:
+            pass
+
+        return True
+
+
+class GCPExecutor(Executor):
+    """Execute jobs via Google Cloud Run Jobs."""
+
+    def __init__(self):
+        from google.cloud import run_v2
+
+        self.client = run_v2.JobsClient()
+        self.project_id = os.environ["GCP_PROJECT_ID"]
+        self.region = os.environ.get("GCP_REGION", "us-central1")
+        self.job_name = os.environ["CLOUD_RUN_JOB"]
+        self.callback_url = os.environ.get("CALLBACK_URL", "")
+        self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
+
+    def run(
+        self,
+        job_id: str,
+        source_path: str,
+        output_path: str,
+        preset: Optional[Dict[str, Any]] = None,
+        trim_start: Optional[float] = None,
+        trim_end: Optional[float] = None,
+        duration: Optional[float] = None,
+        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
+    ) -> bool:
+        """Trigger a Cloud Run Job execution for this job."""
+        import json
+
+        from google.cloud import run_v2
+
+        payload = {
+            "job_id": job_id,
+            "source_key": source_path,
+            "output_key": output_path,
+            "preset": preset,
+            "trim_start": trim_start,
+            "trim_end": trim_end,
+            "duration": duration,
+            "callback_url": self.callback_url,
+            "api_key": self.callback_api_key,
+        }
+
+        job_path = (
+            f"projects/{self.project_id}/locations/{self.region}/jobs/{self.job_name}"
+        )
+
+        request = run_v2.RunJobRequest(
+            name=job_path,
+            overrides=run_v2.RunJobRequest.Overrides(
+                container_overrides=[
+                    run_v2.RunJobRequest.Overrides.ContainerOverride(
+                        env=[
+                            run_v2.EnvVar(
+                                name="MPR_JOB_PAYLOAD", value=json.dumps(payload)
+                            )
+                        ]
+                    )
+                ]
+            ),
+        )
+
+        operation = self.client.run_job(request=request)
+        execution_name = operation.metadata.name
+
+        try:
+            from core.db import update_job_fields
+
+            update_job_fields(job_id, execution_arn=execution_name)
+        except Exception:
+            pass
+
+        return True
+
+
+# Executor registry
+_executors: Dict[str, type] = {
+    "local": LocalExecutor,
+    "lambda": LambdaExecutor,
+    "gcp": GCPExecutor,
+}
+
+_executor_instance: Optional[Executor] = None
+
+
+def get_executor() -> Executor:
+    """Get the configured executor instance."""
+    global _executor_instance
+
+    if _executor_instance is None:
+        executor_type = MPR_EXECUTOR.lower()
+        if executor_type not in _executors:
+            raise ValueError(f"Unknown executor type: {executor_type}")
+        _executor_instance = _executors[executor_type]()
+
+    return _executor_instance
--- a/core/task/gcp_handler.py
+++ b/core/task/gcp_handler.py
@@ -0,0 +1,121 @@
+"""
+Google Cloud Run Job handler for media transcoding.
+
+Reads job payload from the MPR_JOB_PAYLOAD env var (injected by GCPExecutor),
+downloads source from S3-compatible storage (GCS via HMAC + S3 API),
+runs FFmpeg, uploads result, and calls back to the API.
+
+Uses core/storage and core/ffmpeg — same modules as the Celery worker.
+No cloud-provider SDK required here; storage goes through core.storage (boto3 + S3 compat).
+
+Entry point: python -m task.gcp_handler  (set as Cloud Run Job command)
+"""
+
+import json
+import logging
+import os
+import sys
+import tempfile
+from pathlib import Path
+
+import requests
+
+from core.ffmpeg.transcode import TranscodeConfig, transcode
+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def main() -> None:
+    raw = os.environ.get("MPR_JOB_PAYLOAD")
+    if not raw:
+        logger.error("MPR_JOB_PAYLOAD not set")
+        sys.exit(1)
+
+    event = json.loads(raw)
+    job_id = event["job_id"]
+    source_key = event["source_key"]
+    output_key = event["output_key"]
+    preset = event.get("preset")
+    trim_start = event.get("trim_start")
+    trim_end = event.get("trim_end")
+    duration = event.get("duration")
+    callback_url = event.get("callback_url", "")
+    api_key = event.get("api_key", "")
+
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    tmp_source = download_to_temp(BUCKET_IN, source_key)
+    ext_out = Path(output_key).suffix or ".mp4"
+    fd, tmp_output = tempfile.mkstemp(suffix=ext_out)
+    os.close(fd)
+
+    try:
+        if preset:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        success = transcode(config, duration=duration)
+        if not success:
+            raise RuntimeError("Transcode returned False")
+
+        logger.info(f"Uploading to {BUCKET_OUT}/{output_key}")
+        upload_file(tmp_output, BUCKET_OUT, output_key)
+
+        _callback(callback_url, job_id, api_key, {"status": "completed"})
+        logger.info(f"Job {job_id} completed")
+        sys.exit(0)
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+        _callback(callback_url, job_id, api_key, {"status": "failed", "error": str(e)})
+        sys.exit(1)
+
+    finally:
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
+
+def _callback(callback_url: str, job_id: str, api_key: str, payload: dict) -> None:
+    if not callback_url:
+        return
+    try:
+        url = f"{callback_url}/jobs/{job_id}/callback"
+        headers = {"X-API-Key": api_key} if api_key else {}
+        resp = requests.post(url, json=payload, headers=headers, timeout=10)
+        logger.info(f"Callback response: {resp.status_code}")
+    except Exception as e:
+        logger.warning(f"Callback failed: {e}")
+
+
+if __name__ == "__main__":
+    main()
--- a/core/task/lambda_handler.py
+++ b/core/task/lambda_handler.py
@@ -0,0 +1,148 @@
+"""
+AWS Lambda handler for media transcoding.
+
+Receives a job payload from Step Functions, downloads source from S3,
+runs FFmpeg, uploads result to S3, and calls back to the API.
+
+Uses the same core/ffmpeg module as the local Celery worker.
+"""
+
+import json
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+import boto3
+import requests
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+# S3 config
+S3_BUCKET_IN = os.environ.get("S3_BUCKET_IN", "mpr-media-in")
+S3_BUCKET_OUT = os.environ.get("S3_BUCKET_OUT", "mpr-media-out")
+AWS_REGION = os.environ.get("AWS_REGION", "us-east-1")
+
+s3 = boto3.client("s3", region_name=AWS_REGION)
+
+
+def handler(event, context):
+    """
+    Lambda entry point.
+
+    Event payload (from Step Functions):
+    {
+        "job_id": "uuid",
+        "source_key": "path/to/source.mp4",
+        "output_key": "output_filename.mp4",
+        "preset": {...} or null,
+        "trim_start": float or null,
+        "trim_end": float or null,
+        "duration": float or null,
+        "callback_url": "https://mpr.mcrn.ar/api",
+        "api_key": "secret"
+    }
+    """
+    job_id = event["job_id"]
+    source_key = event["source_key"]
+    output_key = event["output_key"]
+    preset = event.get("preset")
+    trim_start = event.get("trim_start")
+    trim_end = event.get("trim_end")
+    duration = event.get("duration")
+    callback_url = event.get("callback_url", "")
+    api_key = event.get("api_key", "")
+
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    # Download source from S3
+    ext_in = Path(source_key).suffix or ".mp4"
+    tmp_source = tempfile.mktemp(suffix=ext_in, dir="/tmp")
+    logger.info(f"Downloading s3://{S3_BUCKET_IN}/{source_key}")
+    s3.download_file(S3_BUCKET_IN, source_key, tmp_source)
+
+    # Prepare output temp file
+    ext_out = Path(output_key).suffix or ".mp4"
+    tmp_output = tempfile.mktemp(suffix=ext_out, dir="/tmp")
+
+    try:
+        # Import ffmpeg module (bundled in container)
+        from core.ffmpeg.transcode import TranscodeConfig, transcode
+
+        if preset:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        success = transcode(config, duration=duration)
+
+        if not success:
+            raise RuntimeError("Transcode returned False")
+
+        # Upload result to S3
+        logger.info(f"Uploading s3://{S3_BUCKET_OUT}/{output_key}")
+        s3.upload_file(tmp_output, S3_BUCKET_OUT, output_key)
+
+        result = {"status": "completed", "job_id": job_id, "output_key": output_key}
+
+        # Callback to API
+        _callback(callback_url, job_id, api_key, {"status": "completed"})
+
+        return result
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+
+        _callback(callback_url, job_id, api_key, {
+            "status": "failed",
+            "error": str(e),
+        })
+
+        return {"status": "failed", "job_id": job_id, "error": str(e)}
+
+    finally:
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
+
+def _callback(callback_url, job_id, api_key, payload):
+    """Call back to API with job result."""
+    if not callback_url:
+        return
+    try:
+        url = f"{callback_url}/jobs/{job_id}/callback"
+        headers = {}
+        if api_key:
+            headers["X-API-Key"] = api_key
+        resp = requests.post(url, json=payload, headers=headers, timeout=10)
+        logger.info(f"Callback response: {resp.status_code}")
+    except Exception as e:
+        logger.warning(f"Callback failed: {e}")
--- a/core/task/tasks.py
+++ b/core/task/tasks.py
@@ -0,0 +1,105 @@
+"""
+Celery tasks for job processing.
+"""
+
+import logging
+import os
+from typing import Any, Dict, Optional
+
+from celery import shared_task
+
+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
+from core.rpc.server import update_job_progress
+from core.task.executor import get_executor
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
+def run_transcode_job(
+    self,
+    job_id: str,
+    source_key: str,
+    output_key: str,
+    preset: Optional[Dict[str, Any]] = None,
+    trim_start: Optional[float] = None,
+    trim_end: Optional[float] = None,
+    duration: Optional[float] = None,
+) -> Dict[str, Any]:
+    """
+    Celery task to run a transcode/trim job.
+
+    Downloads source from S3, runs FFmpeg, uploads result to S3.
+    """
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    update_job_progress(job_id, progress=0, status="processing")
+
+    # Download source from S3 to temp file
+    logger.info(f"Downloading {source_key} from {BUCKET_IN}")
+    tmp_source = download_to_temp(BUCKET_IN, source_key)
+
+    # Create temp output path with same extension
+    import tempfile
+    from pathlib import Path
+
+    ext = Path(output_key).suffix or ".mp4"
+    fd, tmp_output = tempfile.mkstemp(suffix=ext)
+    os.close(fd)
+
+    def progress_callback(percent: int, details: Dict[str, Any]) -> None:
+        update_job_progress(
+            job_id,
+            progress=percent,
+            current_time=details.get("time", 0.0),
+            status="processing",
+        )
+
+    try:
+        executor = get_executor()
+        success = executor.run(
+            job_id=job_id,
+            source_path=tmp_source,
+            output_path=tmp_output,
+            preset=preset,
+            trim_start=trim_start,
+            trim_end=trim_end,
+            duration=duration,
+            progress_callback=progress_callback,
+        )
+
+        if success:
+            # Upload result to S3
+            logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
+            upload_file(tmp_output, BUCKET_OUT, output_key)
+
+            logger.info(f"Job {job_id} completed successfully")
+            update_job_progress(job_id, progress=100, status="completed")
+            return {
+                "status": "completed",
+                "job_id": job_id,
+                "output_key": output_key,
+            }
+        else:
+            raise RuntimeError("Executor returned False")
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+        update_job_progress(job_id, progress=0, status="failed", error=str(e))
+
+        if self.request.retries < self.max_retries:
+            raise self.retry(exc=e)
+
+        return {
+            "status": "failed",
+            "job_id": job_id,
+            "error": str(e),
+        }
+
+    finally:
+        # Clean up temp files
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass