shoehorning graphql, step functions and lamdas. aws deployment scripts

This commit is contained in:
2026-02-06 18:25:42 -03:00
parent 013587d108
commit e642908abb
35 changed files with 2354 additions and 930 deletions

251
api/graphql.py Normal file
View File

@@ -0,0 +1,251 @@
"""
GraphQL API using graphene, mounted on FastAPI/Starlette.
Provides the same data as the REST API but via GraphQL queries and mutations.
Uses Django ORM directly for data access.
Types are generated from schema/ via modelgen — see api/schemas/graphql_types.py.
"""
import os
import graphene
from api.schemas.graphql_types import (
CreateJobInput,
MediaAssetType,
ScanResultType,
SystemStatusType,
TranscodeJobType,
TranscodePresetType,
)
from core.storage import BUCKET_IN, list_objects
# Media extensions (same as assets route)
VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
MEDIA_EXTS = VIDEO_EXTS | AUDIO_EXTS
# ---------------------------------------------------------------------------
# Queries
# ---------------------------------------------------------------------------
class Query(graphene.ObjectType):
assets = graphene.List(
MediaAssetType,
status=graphene.String(),
search=graphene.String(),
)
asset = graphene.Field(MediaAssetType, id=graphene.UUID(required=True))
jobs = graphene.List(
TranscodeJobType,
status=graphene.String(),
source_asset_id=graphene.UUID(),
)
job = graphene.Field(TranscodeJobType, id=graphene.UUID(required=True))
presets = graphene.List(TranscodePresetType)
system_status = graphene.Field(SystemStatusType)
def resolve_assets(self, info, status=None, search=None):
from mpr.media_assets.models import MediaAsset
qs = MediaAsset.objects.all()
if status:
qs = qs.filter(status=status)
if search:
qs = qs.filter(filename__icontains=search)
return qs
def resolve_asset(self, info, id):
from mpr.media_assets.models import MediaAsset
try:
return MediaAsset.objects.get(id=id)
except MediaAsset.DoesNotExist:
return None
def resolve_jobs(self, info, status=None, source_asset_id=None):
from mpr.media_assets.models import TranscodeJob
qs = TranscodeJob.objects.all()
if status:
qs = qs.filter(status=status)
if source_asset_id:
qs = qs.filter(source_asset_id=source_asset_id)
return qs
def resolve_job(self, info, id):
from mpr.media_assets.models import TranscodeJob
try:
return TranscodeJob.objects.get(id=id)
except TranscodeJob.DoesNotExist:
return None
def resolve_presets(self, info):
from mpr.media_assets.models import TranscodePreset
return TranscodePreset.objects.all()
def resolve_system_status(self, info):
return {"status": "ok", "version": "0.1.0"}
# ---------------------------------------------------------------------------
# Mutations
# ---------------------------------------------------------------------------
class ScanMediaFolder(graphene.Mutation):
class Arguments:
pass
Output = ScanResultType
def mutate(self, info):
from mpr.media_assets.models import MediaAsset
objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
existing = set(MediaAsset.objects.values_list("filename", flat=True))
registered = []
skipped = []
for obj in objects:
if obj["filename"] in existing:
skipped.append(obj["filename"])
continue
try:
MediaAsset.objects.create(
filename=obj["filename"],
file_path=obj["key"],
file_size=obj["size"],
)
registered.append(obj["filename"])
except Exception:
pass
return ScanResultType(
found=len(objects),
registered=len(registered),
skipped=len(skipped),
files=registered,
)
class CreateJob(graphene.Mutation):
class Arguments:
input = CreateJobInput(required=True)
Output = TranscodeJobType
def mutate(self, info, input):
from pathlib import Path
from mpr.media_assets.models import MediaAsset, TranscodeJob, TranscodePreset
try:
source = MediaAsset.objects.get(id=input.source_asset_id)
except MediaAsset.DoesNotExist:
raise Exception("Source asset not found")
preset = None
preset_snapshot = {}
if input.preset_id:
try:
preset = TranscodePreset.objects.get(id=input.preset_id)
preset_snapshot = {
"name": preset.name,
"container": preset.container,
"video_codec": preset.video_codec,
"audio_codec": preset.audio_codec,
}
except TranscodePreset.DoesNotExist:
raise Exception("Preset not found")
if not preset and not input.trim_start and not input.trim_end:
raise Exception("Must specify preset_id or trim_start/trim_end")
output_filename = input.output_filename
if not output_filename:
stem = Path(source.filename).stem
ext = preset_snapshot.get("container", "mp4") if preset else "mp4"
output_filename = f"{stem}_output.{ext}"
job = TranscodeJob.objects.create(
source_asset_id=source.id,
preset_id=preset.id if preset else None,
preset_snapshot=preset_snapshot,
trim_start=input.trim_start,
trim_end=input.trim_end,
output_filename=output_filename,
output_path=output_filename,
priority=input.priority or 0,
)
# Dispatch
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode == "lambda":
from task.executor import get_executor
get_executor().run(
job_id=str(job.id),
source_path=source.file_path,
output_path=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
)
else:
from task.tasks import run_transcode_job
result = run_transcode_job.delay(
job_id=str(job.id),
source_key=source.file_path,
output_key=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
return job
class CancelJob(graphene.Mutation):
class Arguments:
id = graphene.UUID(required=True)
Output = TranscodeJobType
def mutate(self, info, id):
from mpr.media_assets.models import TranscodeJob
try:
job = TranscodeJob.objects.get(id=id)
except TranscodeJob.DoesNotExist:
raise Exception("Job not found")
if job.status not in ("pending", "processing"):
raise Exception(f"Cannot cancel job with status: {job.status}")
job.status = "cancelled"
job.save(update_fields=["status"])
return job
class Mutation(graphene.ObjectType):
scan_media_folder = ScanMediaFolder.Field()
create_job = CreateJob.Field()
cancel_job = CancelJob.Field()
# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------
schema = graphene.Schema(query=Query, mutation=Mutation)

View File

@@ -20,7 +20,9 @@ django.setup()
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from api.graphql import schema as graphql_schema
from api.routes import assets_router, jobs_router, presets_router, system_router
from starlette_graphene3 import GraphQLApp, make_graphiql_handler
app = FastAPI(
title="MPR API",
@@ -45,6 +47,9 @@ app.include_router(assets_router, prefix="/api")
app.include_router(presets_router, prefix="/api")
app.include_router(jobs_router, prefix="/api")
# GraphQL
app.mount("/graphql", GraphQLApp(schema=graphql_schema, on_get=make_graphiql_handler()))
@app.get("/")
def root():

View File

@@ -9,45 +9,26 @@ from fastapi import APIRouter, Depends, HTTPException, Query
from api.deps import get_asset
from api.schemas import AssetCreate, AssetResponse, AssetUpdate
from core.storage import BUCKET_IN, list_objects
router = APIRouter(prefix="/assets", tags=["assets"])
# Supported media extensions
VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
MEDIA_EXTS = VIDEO_EXTS | AUDIO_EXTS
@router.post("/", response_model=AssetResponse, status_code=201)
def create_asset(data: AssetCreate):
"""
Register a media file as an asset.
The file must exist on disk. A probe task will be queued
to extract metadata asynchronously.
"""
from pathlib import Path
"""Register a media file as an asset."""
from mpr.media_assets.models import MediaAsset
# Validate file exists
path = Path(data.file_path)
if not path.exists():
raise HTTPException(status_code=400, detail="File not found")
# Store path relative to media root
import os
media_root = Path(os.environ.get("MEDIA_IN", "/app/media/in"))
try:
rel_path = str(path.relative_to(media_root))
except ValueError:
rel_path = path.name
# Create asset
asset = MediaAsset.objects.create(
filename=data.filename or path.name,
file_path=rel_path,
file_size=path.stat().st_size,
filename=data.filename or data.file_path.split("/")[-1],
file_path=data.file_path,
file_size=data.file_size,
)
# TODO: Queue probe task via gRPC/Celery
return asset
@@ -61,10 +42,8 @@ def list_assets(
from mpr.media_assets.models import MediaAsset
qs = MediaAsset.objects.all()
if status:
qs = qs.filter(status=status)
return list(qs[offset : offset + limit])
@@ -102,62 +81,36 @@ def delete_asset(asset_id: UUID, asset=Depends(get_asset)):
@router.post("/scan", response_model=dict)
def scan_media_folder():
"""
Scan the media folder for new video/audio files and register them as assets.
Returns a summary of files found and registered.
Scan the S3 media-in bucket for new video/audio files and register them as assets.
"""
import os
from pathlib import Path
from mpr.media_assets.models import MediaAsset
# Get media input folder from environment
media_root = os.environ.get("MEDIA_IN", "/app/media/in")
media_path = Path(media_root)
if not media_path.exists():
raise HTTPException(
status_code=500, detail=f"Media folder not found: {media_root}"
)
# Supported video/audio extensions
video_exts = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
audio_exts = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
supported_exts = video_exts | audio_exts
# List objects from S3 bucket
objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
# Get existing filenames to avoid duplicates
existing_filenames = set(MediaAsset.objects.values_list("filename", flat=True))
# Scan for media files
found_files = []
registered_files = []
skipped_files = []
for file_path in media_path.rglob("*"):
if file_path.is_file() and file_path.suffix.lower() in supported_exts:
found_files.append(str(file_path))
for obj in objects:
if obj["filename"] in existing_filenames:
skipped_files.append(obj["filename"])
continue
# Skip if already registered
if file_path.name in existing_filenames:
skipped_files.append(file_path.name)
continue
# Register new asset with path relative to media root
rel_path = str(file_path.relative_to(media_path))
try:
asset = MediaAsset.objects.create(
filename=file_path.name,
file_path=rel_path,
file_size=file_path.stat().st_size,
)
registered_files.append(file_path.name)
# TODO: Queue probe task to extract metadata
except Exception as e:
print(f"Error registering {file_path.name}: {e}")
try:
MediaAsset.objects.create(
filename=obj["filename"],
file_path=obj["key"],
file_size=obj["size"],
)
registered_files.append(obj["filename"])
except Exception as e:
print(f"Error registering {obj['filename']}: {e}")
return {
"found": len(found_files),
"found": len(objects),
"registered": len(registered_files),
"skipped": len(skipped_files),
"files": registered_files,

View File

@@ -2,17 +2,20 @@
Job endpoints - transcode/trim job management.
"""
import json
import os
from pathlib import Path
from typing import Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi import APIRouter, Depends, Header, HTTPException, Query
from api.deps import get_asset, get_job, get_preset
from api.schemas import JobCreate, JobResponse
router = APIRouter(prefix="/jobs", tags=["jobs"])
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@router.post("/", response_model=JobResponse, status_code=201)
def create_job(data: JobCreate):
@@ -36,7 +39,6 @@ def create_job(data: JobCreate):
if data.preset_id:
try:
preset = TranscodePreset.objects.get(id=data.preset_id)
# Snapshot preset at job creation time
preset_snapshot = {
"name": preset.name,
"container": preset.container,
@@ -61,22 +63,13 @@ def create_job(data: JobCreate):
status_code=400, detail="Must specify preset_id or trim_start/trim_end"
)
# Generate output filename and path
import os
from pathlib import Path
# Generate output filename - stored as S3 key in output bucket
output_filename = data.output_filename
if not output_filename:
stem = Path(source.filename).stem
ext = preset_snapshot.get("container", "mp4") if preset else "mp4"
output_filename = f"{stem}_output.{ext}"
media_out = os.environ.get("MEDIA_OUT", "/app/media/out")
output_path = str(Path(media_out) / output_filename)
media_in = os.environ.get("MEDIA_IN", "/app/media/in")
source_path = str(Path(media_in) / source.file_path)
# Create job
job = TranscodeJob.objects.create(
source_asset_id=source.id,
@@ -85,26 +78,95 @@ def create_job(data: JobCreate):
trim_start=data.trim_start,
trim_end=data.trim_end,
output_filename=output_filename,
output_path=output_path,
output_path=output_filename, # S3 key in output bucket
priority=data.priority or 0,
)
# Dispatch to Celery
# Dispatch based on executor mode
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode == "lambda":
_dispatch_lambda(job, source, preset_snapshot)
else:
_dispatch_celery(job, source, preset_snapshot)
return job
def _dispatch_celery(job, source, preset_snapshot):
"""Dispatch job to Celery worker."""
from task.tasks import run_transcode_job
result = run_transcode_job.delay(
job_id=str(job.id),
source_path=source_path,
output_path=output_path,
source_key=source.file_path,
output_key=job.output_filename,
preset=preset_snapshot or None,
trim_start=data.trim_start,
trim_end=data.trim_end,
trim_start=job.trim_start,
trim_end=job.trim_end,
duration=source.duration,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
return job
def _dispatch_lambda(job, source, preset_snapshot):
"""Dispatch job to AWS Step Functions."""
from task.executor import get_executor
executor = get_executor()
executor.run(
job_id=str(job.id),
source_path=source.file_path,
output_path=job.output_filename,
preset=preset_snapshot or None,
trim_start=job.trim_start,
trim_end=job.trim_end,
duration=source.duration,
)
@router.post("/{job_id}/callback")
def job_callback(
job_id: UUID,
payload: dict,
x_api_key: Optional[str] = Header(None),
):
"""
Callback endpoint for Lambda to report job completion.
Protected by API key.
"""
if CALLBACK_API_KEY and x_api_key != CALLBACK_API_KEY:
raise HTTPException(status_code=403, detail="Invalid API key")
from django.utils import timezone
from mpr.media_assets.models import TranscodeJob
try:
job = TranscodeJob.objects.get(id=job_id)
except TranscodeJob.DoesNotExist:
raise HTTPException(status_code=404, detail="Job not found")
status = payload.get("status", "failed")
job.status = status
job.progress = 100.0 if status == "completed" else job.progress
update_fields = ["status", "progress"]
if payload.get("error"):
job.error_message = payload["error"]
update_fields.append("error_message")
if status == "completed":
job.completed_at = timezone.now()
update_fields.append("completed_at")
elif status == "failed":
job.completed_at = timezone.now()
update_fields.append("completed_at")
job.save(update_fields=update_fields)
return {"ok": True}
@router.get("/", response_model=list[JobResponse])
@@ -118,12 +180,10 @@ def list_jobs(
from mpr.media_assets.models import TranscodeJob
qs = TranscodeJob.objects.all()
if status:
qs = qs.filter(status=status)
if source_asset_id:
qs = qs.filter(source_asset_id=source_asset_id)
return list(qs[offset : offset + limit])
@@ -154,11 +214,8 @@ def cancel_job(job_id: UUID, job=Depends(get_job)):
status_code=400, detail=f"Cannot cancel job with status: {job.status}"
)
# TODO: Cancel via gRPC
job.status = "cancelled"
job.save(update_fields=["status"])
return job
@@ -173,6 +230,4 @@ def retry_job(job_id: UUID, job=Depends(get_job)):
job.error_message = None
job.save(update_fields=["status", "progress", "error_message"])
# TODO: Resubmit via gRPC
return job

View File

@@ -0,0 +1,129 @@
"""
Graphene Types - GENERATED FILE
Do not edit directly. Regenerate using modelgen.
"""
import graphene
class AssetStatus(graphene.Enum):
PENDING = "pending"
READY = "ready"
ERROR = "error"
class JobStatus(graphene.Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class MediaAssetType(graphene.ObjectType):
"""A video/audio file registered in the system."""
id = graphene.UUID()
filename = graphene.String()
file_path = graphene.String()
status = graphene.String()
error_message = graphene.String()
file_size = graphene.Int()
duration = graphene.Float()
video_codec = graphene.String()
audio_codec = graphene.String()
width = graphene.Int()
height = graphene.Int()
framerate = graphene.Float()
bitrate = graphene.Int()
properties = graphene.JSONString()
comments = graphene.String()
tags = graphene.List(graphene.String)
created_at = graphene.DateTime()
updated_at = graphene.DateTime()
class TranscodePresetType(graphene.ObjectType):
"""A reusable transcoding configuration (like Handbrake presets)."""
id = graphene.UUID()
name = graphene.String()
description = graphene.String()
is_builtin = graphene.Boolean()
container = graphene.String()
video_codec = graphene.String()
video_bitrate = graphene.String()
video_crf = graphene.Int()
video_preset = graphene.String()
resolution = graphene.String()
framerate = graphene.Float()
audio_codec = graphene.String()
audio_bitrate = graphene.String()
audio_channels = graphene.Int()
audio_samplerate = graphene.Int()
extra_args = graphene.List(graphene.String)
created_at = graphene.DateTime()
updated_at = graphene.DateTime()
class TranscodeJobType(graphene.ObjectType):
"""A transcoding or trimming job in the queue."""
id = graphene.UUID()
source_asset_id = graphene.UUID()
preset_id = graphene.UUID()
preset_snapshot = graphene.JSONString()
trim_start = graphene.Float()
trim_end = graphene.Float()
output_filename = graphene.String()
output_path = graphene.String()
output_asset_id = graphene.UUID()
status = graphene.String()
progress = graphene.Float()
current_frame = graphene.Int()
current_time = graphene.Float()
speed = graphene.String()
error_message = graphene.String()
celery_task_id = graphene.String()
execution_arn = graphene.String()
priority = graphene.Int()
created_at = graphene.DateTime()
started_at = graphene.DateTime()
completed_at = graphene.DateTime()
class CreateJobInput(graphene.InputObjectType):
"""Request body for creating a transcode/trim job."""
source_asset_id = graphene.UUID(required=True)
preset_id = graphene.UUID()
trim_start = graphene.Float()
trim_end = graphene.Float()
output_filename = graphene.String()
priority = graphene.Int(default_value=0)
class SystemStatusType(graphene.ObjectType):
"""System status response."""
status = graphene.String()
version = graphene.String()
class ScanResultType(graphene.ObjectType):
"""Result of scanning the media input bucket."""
found = graphene.Int()
registered = graphene.Int()
skipped = graphene.Int()
files = graphene.List(graphene.String)
class WorkerStatusType(graphene.ObjectType):
"""Worker health and capabilities."""
available = graphene.Boolean()
active_jobs = graphene.Int()
supported_codecs = graphene.List(graphene.String)
gpu_available = graphene.Boolean()

View File

@@ -83,6 +83,7 @@ class TranscodeJob(BaseModel):
speed: Optional[str] = None
error_message: Optional[str] = None
celery_task_id: Optional[str] = None
execution_arn: Optional[str] = None
priority: int = 0
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None