major refactor

This commit is contained in:
2026-03-13 01:07:02 -03:00
parent eaaf2ad60c
commit 3eeedebb15
61 changed files with 441 additions and 242 deletions

15
core/task/__init__.py Normal file
View File

@@ -0,0 +1,15 @@
"""
MPR Worker Module
Provides executor abstraction and Celery tasks for job processing.
"""
from .executor import Executor, LocalExecutor, get_executor
from .tasks import run_transcode_job
__all__ = [
"Executor",
"LocalExecutor",
"get_executor",
"run_transcode_job",
]

260
core/task/executor.py Normal file
View File

@@ -0,0 +1,260 @@
"""
Executor abstraction for job processing.
Supports different backends:
- LocalExecutor: FFmpeg via Celery (default)
- LambdaExecutor: AWS Lambda (future)
"""
import os
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Optional
from core.ffmpeg.transcode import TranscodeConfig, transcode
# Configuration from environment
MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
class Executor(ABC):
"""Abstract base class for job executors."""
@abstractmethod
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""
Execute a transcode/trim job.
Args:
job_id: Unique job identifier
source_path: Path to source file
output_path: Path for output file
preset: Transcode preset dict (optional, None = trim only)
trim_start: Trim start time in seconds (optional)
trim_end: Trim end time in seconds (optional)
duration: Source duration in seconds (for progress calculation)
progress_callback: Called with (percent, details_dict)
Returns:
True if successful
"""
pass
class LocalExecutor(Executor):
"""Execute jobs locally using FFmpeg."""
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Execute job using local FFmpeg."""
# Build config from preset or use stream copy for trim-only
if preset:
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
# Trim-only: stream copy
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
# Wrapper to convert float percent to int
def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
if progress_callback:
progress_callback(int(percent), details)
return transcode(
config,
duration=duration,
progress_callback=wrapped_callback if progress_callback else None,
)
class LambdaExecutor(Executor):
"""Execute jobs via AWS Step Functions + Lambda."""
def __init__(self):
import boto3
region = os.environ.get("AWS_REGION", "us-east-1")
self.sfn = boto3.client("stepfunctions", region_name=region)
self.state_machine_arn = os.environ["STEP_FUNCTION_ARN"]
self.callback_url = os.environ.get("CALLBACK_URL", "")
self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Start a Step Functions execution for this job."""
import json
payload = {
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
response = self.sfn.start_execution(
stateMachineArn=self.state_machine_arn,
name=f"mpr-{job_id}",
input=json.dumps(payload),
)
# Store execution ARN on the job
execution_arn = response["executionArn"]
try:
from core.db import update_job_fields
update_job_fields(job_id, execution_arn=execution_arn)
except Exception:
pass
return True
class GCPExecutor(Executor):
"""Execute jobs via Google Cloud Run Jobs."""
def __init__(self):
from google.cloud import run_v2
self.client = run_v2.JobsClient()
self.project_id = os.environ["GCP_PROJECT_ID"]
self.region = os.environ.get("GCP_REGION", "us-central1")
self.job_name = os.environ["CLOUD_RUN_JOB"]
self.callback_url = os.environ.get("CALLBACK_URL", "")
self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Trigger a Cloud Run Job execution for this job."""
import json
from google.cloud import run_v2
payload = {
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
job_path = (
f"projects/{self.project_id}/locations/{self.region}/jobs/{self.job_name}"
)
request = run_v2.RunJobRequest(
name=job_path,
overrides=run_v2.RunJobRequest.Overrides(
container_overrides=[
run_v2.RunJobRequest.Overrides.ContainerOverride(
env=[
run_v2.EnvVar(
name="MPR_JOB_PAYLOAD", value=json.dumps(payload)
)
]
)
]
),
)
operation = self.client.run_job(request=request)
execution_name = operation.metadata.name
try:
from core.db import update_job_fields
update_job_fields(job_id, execution_arn=execution_name)
except Exception:
pass
return True
# Executor registry
_executors: Dict[str, type] = {
"local": LocalExecutor,
"lambda": LambdaExecutor,
"gcp": GCPExecutor,
}
_executor_instance: Optional[Executor] = None
def get_executor() -> Executor:
"""Get the configured executor instance."""
global _executor_instance
if _executor_instance is None:
executor_type = MPR_EXECUTOR.lower()
if executor_type not in _executors:
raise ValueError(f"Unknown executor type: {executor_type}")
_executor_instance = _executors[executor_type]()
return _executor_instance

121
core/task/gcp_handler.py Normal file
View File

@@ -0,0 +1,121 @@
"""
Google Cloud Run Job handler for media transcoding.
Reads job payload from the MPR_JOB_PAYLOAD env var (injected by GCPExecutor),
downloads source from S3-compatible storage (GCS via HMAC + S3 API),
runs FFmpeg, uploads result, and calls back to the API.
Uses core/storage and core/ffmpeg — same modules as the Celery worker.
No cloud-provider SDK required here; storage goes through core.storage (boto3 + S3 compat).
Entry point: python -m task.gcp_handler (set as Cloud Run Job command)
"""
import json
import logging
import os
import sys
import tempfile
from pathlib import Path
import requests
from core.ffmpeg.transcode import TranscodeConfig, transcode
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def main() -> None:
raw = os.environ.get("MPR_JOB_PAYLOAD")
if not raw:
logger.error("MPR_JOB_PAYLOAD not set")
sys.exit(1)
event = json.loads(raw)
job_id = event["job_id"]
source_key = event["source_key"]
output_key = event["output_key"]
preset = event.get("preset")
trim_start = event.get("trim_start")
trim_end = event.get("trim_end")
duration = event.get("duration")
callback_url = event.get("callback_url", "")
api_key = event.get("api_key", "")
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
tmp_source = download_to_temp(BUCKET_IN, source_key)
ext_out = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext_out)
os.close(fd)
try:
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
success = transcode(config, duration=duration)
if not success:
raise RuntimeError("Transcode returned False")
logger.info(f"Uploading to {BUCKET_OUT}/{output_key}")
upload_file(tmp_output, BUCKET_OUT, output_key)
_callback(callback_url, job_id, api_key, {"status": "completed"})
logger.info(f"Job {job_id} completed")
sys.exit(0)
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
_callback(callback_url, job_id, api_key, {"status": "failed", "error": str(e)})
sys.exit(1)
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass
def _callback(callback_url: str, job_id: str, api_key: str, payload: dict) -> None:
if not callback_url:
return
try:
url = f"{callback_url}/jobs/{job_id}/callback"
headers = {"X-API-Key": api_key} if api_key else {}
resp = requests.post(url, json=payload, headers=headers, timeout=10)
logger.info(f"Callback response: {resp.status_code}")
except Exception as e:
logger.warning(f"Callback failed: {e}")
if __name__ == "__main__":
main()

148
core/task/lambda_handler.py Normal file
View File

@@ -0,0 +1,148 @@
"""
AWS Lambda handler for media transcoding.
Receives a job payload from Step Functions, downloads source from S3,
runs FFmpeg, uploads result to S3, and calls back to the API.
Uses the same core/ffmpeg module as the local Celery worker.
"""
import json
import logging
import os
import tempfile
from pathlib import Path
import boto3
import requests
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# S3 config
S3_BUCKET_IN = os.environ.get("S3_BUCKET_IN", "mpr-media-in")
S3_BUCKET_OUT = os.environ.get("S3_BUCKET_OUT", "mpr-media-out")
AWS_REGION = os.environ.get("AWS_REGION", "us-east-1")
s3 = boto3.client("s3", region_name=AWS_REGION)
def handler(event, context):
"""
Lambda entry point.
Event payload (from Step Functions):
{
"job_id": "uuid",
"source_key": "path/to/source.mp4",
"output_key": "output_filename.mp4",
"preset": {...} or null,
"trim_start": float or null,
"trim_end": float or null,
"duration": float or null,
"callback_url": "https://mpr.mcrn.ar/api",
"api_key": "secret"
}
"""
job_id = event["job_id"]
source_key = event["source_key"]
output_key = event["output_key"]
preset = event.get("preset")
trim_start = event.get("trim_start")
trim_end = event.get("trim_end")
duration = event.get("duration")
callback_url = event.get("callback_url", "")
api_key = event.get("api_key", "")
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
# Download source from S3
ext_in = Path(source_key).suffix or ".mp4"
tmp_source = tempfile.mktemp(suffix=ext_in, dir="/tmp")
logger.info(f"Downloading s3://{S3_BUCKET_IN}/{source_key}")
s3.download_file(S3_BUCKET_IN, source_key, tmp_source)
# Prepare output temp file
ext_out = Path(output_key).suffix or ".mp4"
tmp_output = tempfile.mktemp(suffix=ext_out, dir="/tmp")
try:
# Import ffmpeg module (bundled in container)
from core.ffmpeg.transcode import TranscodeConfig, transcode
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
success = transcode(config, duration=duration)
if not success:
raise RuntimeError("Transcode returned False")
# Upload result to S3
logger.info(f"Uploading s3://{S3_BUCKET_OUT}/{output_key}")
s3.upload_file(tmp_output, S3_BUCKET_OUT, output_key)
result = {"status": "completed", "job_id": job_id, "output_key": output_key}
# Callback to API
_callback(callback_url, job_id, api_key, {"status": "completed"})
return result
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
_callback(callback_url, job_id, api_key, {
"status": "failed",
"error": str(e),
})
return {"status": "failed", "job_id": job_id, "error": str(e)}
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass
def _callback(callback_url, job_id, api_key, payload):
"""Call back to API with job result."""
if not callback_url:
return
try:
url = f"{callback_url}/jobs/{job_id}/callback"
headers = {}
if api_key:
headers["X-API-Key"] = api_key
resp = requests.post(url, json=payload, headers=headers, timeout=10)
logger.info(f"Callback response: {resp.status_code}")
except Exception as e:
logger.warning(f"Callback failed: {e}")

105
core/task/tasks.py Normal file
View File

@@ -0,0 +1,105 @@
"""
Celery tasks for job processing.
"""
import logging
import os
from typing import Any, Dict, Optional
from celery import shared_task
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from core.rpc.server import update_job_progress
from core.task.executor import get_executor
logger = logging.getLogger(__name__)
@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
def run_transcode_job(
self,
job_id: str,
source_key: str,
output_key: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
) -> Dict[str, Any]:
"""
Celery task to run a transcode/trim job.
Downloads source from S3, runs FFmpeg, uploads result to S3.
"""
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
update_job_progress(job_id, progress=0, status="processing")
# Download source from S3 to temp file
logger.info(f"Downloading {source_key} from {BUCKET_IN}")
tmp_source = download_to_temp(BUCKET_IN, source_key)
# Create temp output path with same extension
import tempfile
from pathlib import Path
ext = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext)
os.close(fd)
def progress_callback(percent: int, details: Dict[str, Any]) -> None:
update_job_progress(
job_id,
progress=percent,
current_time=details.get("time", 0.0),
status="processing",
)
try:
executor = get_executor()
success = executor.run(
job_id=job_id,
source_path=tmp_source,
output_path=tmp_output,
preset=preset,
trim_start=trim_start,
trim_end=trim_end,
duration=duration,
progress_callback=progress_callback,
)
if success:
# Upload result to S3
logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
upload_file(tmp_output, BUCKET_OUT, output_key)
logger.info(f"Job {job_id} completed successfully")
update_job_progress(job_id, progress=100, status="completed")
return {
"status": "completed",
"job_id": job_id,
"output_key": output_key,
}
else:
raise RuntimeError("Executor returned False")
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
update_job_progress(job_id, progress=0, status="failed", error=str(e))
if self.request.retries < self.max_retries:
raise self.retry(exc=e)
return {
"status": "failed",
"job_id": job_id,
"error": str(e),
}
finally:
# Clean up temp files
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass