shoehorning graphql, step functions and lamdas. aws deployment scripts

2026-02-06 18:25:42 -03:00
parent 013587d108
commit e642908abb
35 changed files with 2354 additions and 930 deletions
--- a/task/executor.py
+++ b/task/executor.py
@@ -110,7 +110,16 @@ class LocalExecutor(Executor):


 class LambdaExecutor(Executor):
-    """Execute jobs via AWS Lambda (future implementation)."""
+    """Execute jobs via AWS Step Functions + Lambda."""
+
+    def __init__(self):
+        import boto3
+
+        region = os.environ.get("AWS_REGION", "us-east-1")
+        self.sfn = boto3.client("stepfunctions", region_name=region)
+        self.state_machine_arn = os.environ["STEP_FUNCTION_ARN"]
+        self.callback_url = os.environ.get("CALLBACK_URL", "")
+        self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")

    def run(
        self,
@@ -123,8 +132,36 @@ class LambdaExecutor(Executor):
        duration: Optional[float] = None,
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> bool:
-        """Execute job via AWS Lambda."""
-        raise NotImplementedError("LambdaExecutor not yet implemented")
+        """Start a Step Functions execution for this job."""
+        import json
+
+        payload = {
+            "job_id": job_id,
+            "source_key": source_path,
+            "output_key": output_path,
+            "preset": preset,
+            "trim_start": trim_start,
+            "trim_end": trim_end,
+            "duration": duration,
+            "callback_url": self.callback_url,
+            "api_key": self.callback_api_key,
+        }
+
+        response = self.sfn.start_execution(
+            stateMachineArn=self.state_machine_arn,
+            name=f"mpr-{job_id}",
+            input=json.dumps(payload),
+        )
+
+        # Store execution ARN on the job
+        execution_arn = response["executionArn"]
+        try:
+            from mpr.media_assets.models import TranscodeJob
+            TranscodeJob.objects.filter(id=job_id).update(execution_arn=execution_arn)
+        except Exception:
+            pass
+
+        return True


 # Executor registry
--- a/task/lambda_handler.py
+++ b/task/lambda_handler.py
@@ -0,0 +1,148 @@
+"""
+AWS Lambda handler for media transcoding.
+
+Receives a job payload from Step Functions, downloads source from S3,
+runs FFmpeg, uploads result to S3, and calls back to the API.
+
+Uses the same core/ffmpeg module as the local Celery worker.
+"""
+
+import json
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+import boto3
+import requests
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+# S3 config
+S3_BUCKET_IN = os.environ.get("S3_BUCKET_IN", "mpr-media-in")
+S3_BUCKET_OUT = os.environ.get("S3_BUCKET_OUT", "mpr-media-out")
+AWS_REGION = os.environ.get("AWS_REGION", "us-east-1")
+
+s3 = boto3.client("s3", region_name=AWS_REGION)
+
+
+def handler(event, context):
+    """
+    Lambda entry point.
+
+    Event payload (from Step Functions):
+    {
+        "job_id": "uuid",
+        "source_key": "path/to/source.mp4",
+        "output_key": "output_filename.mp4",
+        "preset": {...} or null,
+        "trim_start": float or null,
+        "trim_end": float or null,
+        "duration": float or null,
+        "callback_url": "https://mpr.mcrn.ar/api",
+        "api_key": "secret"
+    }
+    """
+    job_id = event["job_id"]
+    source_key = event["source_key"]
+    output_key = event["output_key"]
+    preset = event.get("preset")
+    trim_start = event.get("trim_start")
+    trim_end = event.get("trim_end")
+    duration = event.get("duration")
+    callback_url = event.get("callback_url", "")
+    api_key = event.get("api_key", "")
+
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
+
+    # Download source from S3
+    ext_in = Path(source_key).suffix or ".mp4"
+    tmp_source = tempfile.mktemp(suffix=ext_in, dir="/tmp")
+    logger.info(f"Downloading s3://{S3_BUCKET_IN}/{source_key}")
+    s3.download_file(S3_BUCKET_IN, source_key, tmp_source)
+
+    # Prepare output temp file
+    ext_out = Path(output_key).suffix or ".mp4"
+    tmp_output = tempfile.mktemp(suffix=ext_out, dir="/tmp")
+
+    try:
+        # Import ffmpeg module (bundled in container)
+        from core.ffmpeg.transcode import TranscodeConfig, transcode
+
+        if preset:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec=preset.get("video_codec", "libx264"),
+                video_bitrate=preset.get("video_bitrate"),
+                video_crf=preset.get("video_crf"),
+                video_preset=preset.get("video_preset"),
+                resolution=preset.get("resolution"),
+                framerate=preset.get("framerate"),
+                audio_codec=preset.get("audio_codec", "aac"),
+                audio_bitrate=preset.get("audio_bitrate"),
+                audio_channels=preset.get("audio_channels"),
+                audio_samplerate=preset.get("audio_samplerate"),
+                container=preset.get("container", "mp4"),
+                extra_args=preset.get("extra_args", []),
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+        else:
+            config = TranscodeConfig(
+                input_path=tmp_source,
+                output_path=tmp_output,
+                video_codec="copy",
+                audio_codec="copy",
+                trim_start=trim_start,
+                trim_end=trim_end,
+            )
+
+        success = transcode(config, duration=duration)
+
+        if not success:
+            raise RuntimeError("Transcode returned False")
+
+        # Upload result to S3
+        logger.info(f"Uploading s3://{S3_BUCKET_OUT}/{output_key}")
+        s3.upload_file(tmp_output, S3_BUCKET_OUT, output_key)
+
+        result = {"status": "completed", "job_id": job_id, "output_key": output_key}
+
+        # Callback to API
+        _callback(callback_url, job_id, api_key, {"status": "completed"})
+
+        return result
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+
+        _callback(callback_url, job_id, api_key, {
+            "status": "failed",
+            "error": str(e),
+        })
+
+        return {"status": "failed", "job_id": job_id, "error": str(e)}
+
+    finally:
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
+
+def _callback(callback_url, job_id, api_key, payload):
+    """Call back to API with job result."""
+    if not callback_url:
+        return
+    try:
+        url = f"{callback_url}/jobs/{job_id}/callback"
+        headers = {}
+        if api_key:
+            headers["X-API-Key"] = api_key
+        resp = requests.post(url, json=payload, headers=headers, timeout=10)
+        logger.info(f"Callback response: {resp.status_code}")
+    except Exception as e:
+        logger.warning(f"Callback failed: {e}")
--- a/task/tasks.py
+++ b/task/tasks.py
@@ -8,21 +8,19 @@ from typing import Any, Dict, Optional

 from celery import shared_task

+from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
 from rpc.server import update_job_progress
 from task.executor import get_executor

 logger = logging.getLogger(__name__)

-# Media paths from environment
-MEDIA_ROOT = os.environ.get("MEDIA_ROOT", "/app/media")
-

@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
 def run_transcode_job(
    self,
    job_id: str,
-    source_path: str,
-    output_path: str,
+    source_key: str,
+    output_key: str,
    preset: Optional[Dict[str, Any]] = None,
    trim_start: Optional[float] = None,
    trim_end: Optional[float] = None,
@@ -31,25 +29,25 @@ def run_transcode_job(
    """
    Celery task to run a transcode/trim job.

-    Args:
-        job_id: Unique job identifier
-        source_path: Path to source file
-        output_path: Path for output file
-        preset: Transcode preset dict (optional)
-        trim_start: Trim start time in seconds (optional)
-        trim_end: Trim end time in seconds (optional)
-        duration: Source duration for progress calculation
-
-    Returns:
-        Result dict with status and output_path
+    Downloads source from S3, runs FFmpeg, uploads result to S3.
    """
-    logger.info(f"Starting job {job_id}: {source_path} -> {output_path}")
+    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")

-    # Update status to processing
    update_job_progress(job_id, progress=0, status="processing")

+    # Download source from S3 to temp file
+    logger.info(f"Downloading {source_key} from {BUCKET_IN}")
+    tmp_source = download_to_temp(BUCKET_IN, source_key)
+
+    # Create temp output path with same extension
+    import tempfile
+    from pathlib import Path
+
+    ext = Path(output_key).suffix or ".mp4"
+    fd, tmp_output = tempfile.mkstemp(suffix=ext)
+    os.close(fd)
+
    def progress_callback(percent: int, details: Dict[str, Any]) -> None:
-        """Update gRPC progress state."""
        update_job_progress(
            job_id,
            progress=percent,
@@ -61,8 +59,8 @@ def run_transcode_job(
        executor = get_executor()
        success = executor.run(
            job_id=job_id,
-            source_path=source_path,
-            output_path=output_path,
+            source_path=tmp_source,
+            output_path=tmp_output,
            preset=preset,
            trim_start=trim_start,
            trim_end=trim_end,
@@ -71,12 +69,16 @@ def run_transcode_job(
        )

        if success:
+            # Upload result to S3
+            logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
+            upload_file(tmp_output, BUCKET_OUT, output_key)
+
            logger.info(f"Job {job_id} completed successfully")
            update_job_progress(job_id, progress=100, status="completed")
            return {
                "status": "completed",
                "job_id": job_id,
-                "output_path": output_path,
+                "output_key": output_key,
            }
        else:
            raise RuntimeError("Executor returned False")
@@ -85,7 +87,6 @@ def run_transcode_job(
        logger.exception(f"Job {job_id} failed: {e}")
        update_job_progress(job_id, progress=0, status="failed", error=str(e))

-        # Retry on transient errors
        if self.request.retries < self.max_retries:
            raise self.retry(exc=e)

@@ -94,3 +95,11 @@ def run_transcode_job(
            "job_id": job_id,
            "error": str(e),
        }
+
+    finally:
+        # Clean up temp files
+        for f in [tmp_source, tmp_output]:
+            try:
+                os.unlink(f)
+            except OSError:
+                pass