shoehorning graphql, step functions and lamdas. aws deployment scripts

This commit is contained in:
2026-02-06 18:25:42 -03:00
parent 013587d108
commit e642908abb
35 changed files with 2354 additions and 930 deletions

View File

@@ -9,45 +9,26 @@ from fastapi import APIRouter, Depends, HTTPException, Query
from api.deps import get_asset
from api.schemas import AssetCreate, AssetResponse, AssetUpdate
from core.storage import BUCKET_IN, list_objects
router = APIRouter(prefix="/assets", tags=["assets"])
# Supported media extensions
VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
MEDIA_EXTS = VIDEO_EXTS | AUDIO_EXTS
@router.post("/", response_model=AssetResponse, status_code=201)
def create_asset(data: AssetCreate):
"""
Register a media file as an asset.
The file must exist on disk. A probe task will be queued
to extract metadata asynchronously.
"""
from pathlib import Path
"""Register a media file as an asset."""
from mpr.media_assets.models import MediaAsset
# Validate file exists
path = Path(data.file_path)
if not path.exists():
raise HTTPException(status_code=400, detail="File not found")
# Store path relative to media root
import os
media_root = Path(os.environ.get("MEDIA_IN", "/app/media/in"))
try:
rel_path = str(path.relative_to(media_root))
except ValueError:
rel_path = path.name
# Create asset
asset = MediaAsset.objects.create(
filename=data.filename or path.name,
file_path=rel_path,
file_size=path.stat().st_size,
filename=data.filename or data.file_path.split("/")[-1],
file_path=data.file_path,
file_size=data.file_size,
)
# TODO: Queue probe task via gRPC/Celery
return asset
@@ -61,10 +42,8 @@ def list_assets(
from mpr.media_assets.models import MediaAsset
qs = MediaAsset.objects.all()
if status:
qs = qs.filter(status=status)
return list(qs[offset : offset + limit])
@@ -102,62 +81,36 @@ def delete_asset(asset_id: UUID, asset=Depends(get_asset)):
@router.post("/scan", response_model=dict)
def scan_media_folder():
"""
Scan the media folder for new video/audio files and register them as assets.
Returns a summary of files found and registered.
Scan the S3 media-in bucket for new video/audio files and register them as assets.
"""
import os
from pathlib import Path
from mpr.media_assets.models import MediaAsset
# Get media input folder from environment
media_root = os.environ.get("MEDIA_IN", "/app/media/in")
media_path = Path(media_root)
if not media_path.exists():
raise HTTPException(
status_code=500, detail=f"Media folder not found: {media_root}"
)
# Supported video/audio extensions
video_exts = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
audio_exts = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
supported_exts = video_exts | audio_exts
# List objects from S3 bucket
objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
# Get existing filenames to avoid duplicates
existing_filenames = set(MediaAsset.objects.values_list("filename", flat=True))
# Scan for media files
found_files = []
registered_files = []
skipped_files = []
for file_path in media_path.rglob("*"):
if file_path.is_file() and file_path.suffix.lower() in supported_exts:
found_files.append(str(file_path))
for obj in objects:
if obj["filename"] in existing_filenames:
skipped_files.append(obj["filename"])
continue
# Skip if already registered
if file_path.name in existing_filenames:
skipped_files.append(file_path.name)
continue
# Register new asset with path relative to media root
rel_path = str(file_path.relative_to(media_path))
try:
asset = MediaAsset.objects.create(
filename=file_path.name,
file_path=rel_path,
file_size=file_path.stat().st_size,
)
registered_files.append(file_path.name)
# TODO: Queue probe task to extract metadata
except Exception as e:
print(f"Error registering {file_path.name}: {e}")
try:
MediaAsset.objects.create(
filename=obj["filename"],
file_path=obj["key"],
file_size=obj["size"],
)
registered_files.append(obj["filename"])
except Exception as e:
print(f"Error registering {obj['filename']}: {e}")
return {
"found": len(found_files),
"found": len(objects),
"registered": len(registered_files),
"skipped": len(skipped_files),
"files": registered_files,

View File

@@ -2,17 +2,20 @@
Job endpoints - transcode/trim job management.
"""
import json
import os
from pathlib import Path
from typing import Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi import APIRouter, Depends, Header, HTTPException, Query
from api.deps import get_asset, get_job, get_preset
from api.schemas import JobCreate, JobResponse
router = APIRouter(prefix="/jobs", tags=["jobs"])
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@router.post("/", response_model=JobResponse, status_code=201)
def create_job(data: JobCreate):
@@ -36,7 +39,6 @@ def create_job(data: JobCreate):
if data.preset_id:
try:
preset = TranscodePreset.objects.get(id=data.preset_id)
# Snapshot preset at job creation time
preset_snapshot = {
"name": preset.name,
"container": preset.container,
@@ -61,22 +63,13 @@ def create_job(data: JobCreate):
status_code=400, detail="Must specify preset_id or trim_start/trim_end"
)
# Generate output filename and path
import os
from pathlib import Path
# Generate output filename - stored as S3 key in output bucket
output_filename = data.output_filename
if not output_filename:
stem = Path(source.filename).stem
ext = preset_snapshot.get("container", "mp4") if preset else "mp4"
output_filename = f"{stem}_output.{ext}"
media_out = os.environ.get("MEDIA_OUT", "/app/media/out")
output_path = str(Path(media_out) / output_filename)
media_in = os.environ.get("MEDIA_IN", "/app/media/in")
source_path = str(Path(media_in) / source.file_path)
# Create job
job = TranscodeJob.objects.create(
source_asset_id=source.id,
@@ -85,26 +78,95 @@ def create_job(data: JobCreate):
trim_start=data.trim_start,
trim_end=data.trim_end,
output_filename=output_filename,
output_path=output_path,
output_path=output_filename, # S3 key in output bucket
priority=data.priority or 0,
)
# Dispatch to Celery
# Dispatch based on executor mode
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode == "lambda":
_dispatch_lambda(job, source, preset_snapshot)
else:
_dispatch_celery(job, source, preset_snapshot)
return job
def _dispatch_celery(job, source, preset_snapshot):
"""Dispatch job to Celery worker."""
from task.tasks import run_transcode_job
result = run_transcode_job.delay(
job_id=str(job.id),
source_path=source_path,
output_path=output_path,
source_key=source.file_path,
output_key=job.output_filename,
preset=preset_snapshot or None,
trim_start=data.trim_start,
trim_end=data.trim_end,
trim_start=job.trim_start,
trim_end=job.trim_end,
duration=source.duration,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
return job
def _dispatch_lambda(job, source, preset_snapshot):
"""Dispatch job to AWS Step Functions."""
from task.executor import get_executor
executor = get_executor()
executor.run(
job_id=str(job.id),
source_path=source.file_path,
output_path=job.output_filename,
preset=preset_snapshot or None,
trim_start=job.trim_start,
trim_end=job.trim_end,
duration=source.duration,
)
@router.post("/{job_id}/callback")
def job_callback(
job_id: UUID,
payload: dict,
x_api_key: Optional[str] = Header(None),
):
"""
Callback endpoint for Lambda to report job completion.
Protected by API key.
"""
if CALLBACK_API_KEY and x_api_key != CALLBACK_API_KEY:
raise HTTPException(status_code=403, detail="Invalid API key")
from django.utils import timezone
from mpr.media_assets.models import TranscodeJob
try:
job = TranscodeJob.objects.get(id=job_id)
except TranscodeJob.DoesNotExist:
raise HTTPException(status_code=404, detail="Job not found")
status = payload.get("status", "failed")
job.status = status
job.progress = 100.0 if status == "completed" else job.progress
update_fields = ["status", "progress"]
if payload.get("error"):
job.error_message = payload["error"]
update_fields.append("error_message")
if status == "completed":
job.completed_at = timezone.now()
update_fields.append("completed_at")
elif status == "failed":
job.completed_at = timezone.now()
update_fields.append("completed_at")
job.save(update_fields=update_fields)
return {"ok": True}
@router.get("/", response_model=list[JobResponse])
@@ -118,12 +180,10 @@ def list_jobs(
from mpr.media_assets.models import TranscodeJob
qs = TranscodeJob.objects.all()
if status:
qs = qs.filter(status=status)
if source_asset_id:
qs = qs.filter(source_asset_id=source_asset_id)
return list(qs[offset : offset + limit])
@@ -154,11 +214,8 @@ def cancel_job(job_id: UUID, job=Depends(get_job)):
status_code=400, detail=f"Cannot cancel job with status: {job.status}"
)
# TODO: Cancel via gRPC
job.status = "cancelled"
job.save(update_fields=["status"])
return job
@@ -173,6 +230,4 @@ def retry_job(job_id: UUID, job=Depends(get_job)):
job.error_message = None
job.save(update_fields=["status", "progress", "error_message"])
# TODO: Resubmit via gRPC
return job