major refactor

This commit is contained in:
2026-03-13 01:07:02 -03:00
parent eaaf2ad60c
commit 3eeedebb15
61 changed files with 441 additions and 242 deletions

273
core/api/graphql.py Normal file
View File

@@ -0,0 +1,273 @@
"""
GraphQL API using strawberry, served via FastAPI.
Primary API for MPR — all client interactions go through GraphQL.
Uses core.db for data access.
Types are generated from schema/ via modelgen — see api/schema/graphql.py.
"""
import os
from typing import List, Optional
from uuid import UUID
import strawberry
from strawberry.schema.config import StrawberryConfig
from strawberry.types import Info
from core.api.schema.graphql import (
CreateJobInput,
DeleteResultType,
MediaAssetType,
ScanResultType,
SystemStatusType,
TranscodeJobType,
TranscodePresetType,
UpdateAssetInput,
)
from core.storage import BUCKET_IN, list_objects
VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
MEDIA_EXTS = VIDEO_EXTS | AUDIO_EXTS
# ---------------------------------------------------------------------------
# Queries
# ---------------------------------------------------------------------------
@strawberry.type
class Query:
@strawberry.field
def assets(
self,
info: Info,
status: Optional[str] = None,
search: Optional[str] = None,
) -> List[MediaAssetType]:
from core.db import list_assets
return list_assets(status=status, search=search)
@strawberry.field
def asset(self, info: Info, id: UUID) -> Optional[MediaAssetType]:
from core.db import get_asset
try:
return get_asset(id)
except Exception:
return None
@strawberry.field
def jobs(
self,
info: Info,
status: Optional[str] = None,
source_asset_id: Optional[UUID] = None,
) -> List[TranscodeJobType]:
from core.db import list_jobs
return list_jobs(status=status, source_asset_id=source_asset_id)
@strawberry.field
def job(self, info: Info, id: UUID) -> Optional[TranscodeJobType]:
from core.db import get_job
try:
return get_job(id)
except Exception:
return None
@strawberry.field
def presets(self, info: Info) -> List[TranscodePresetType]:
from core.db import list_presets
return list_presets()
@strawberry.field
def system_status(self, info: Info) -> SystemStatusType:
return SystemStatusType(status="ok", version="0.1.0")
# ---------------------------------------------------------------------------
# Mutations
# ---------------------------------------------------------------------------
@strawberry.type
class Mutation:
@strawberry.mutation
def scan_media_folder(self, info: Info) -> ScanResultType:
from core.db import create_asset, get_asset_filenames
objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
existing = get_asset_filenames()
registered = []
skipped = []
for obj in objects:
if obj["filename"] in existing:
skipped.append(obj["filename"])
continue
try:
create_asset(
filename=obj["filename"],
file_path=obj["key"],
file_size=obj["size"],
)
registered.append(obj["filename"])
except Exception:
pass
return ScanResultType(
found=len(objects),
registered=len(registered),
skipped=len(skipped),
files=registered,
)
@strawberry.mutation
def create_job(self, info: Info, input: CreateJobInput) -> TranscodeJobType:
from pathlib import Path
from core.db import create_job, get_asset, get_preset
try:
source = get_asset(input.source_asset_id)
except Exception:
raise Exception("Source asset not found")
preset = None
preset_snapshot = {}
if input.preset_id:
try:
preset = get_preset(input.preset_id)
preset_snapshot = {
"name": preset.name,
"container": preset.container,
"video_codec": preset.video_codec,
"audio_codec": preset.audio_codec,
}
except Exception:
raise Exception("Preset not found")
if not preset and not input.trim_start and not input.trim_end:
raise Exception("Must specify preset_id or trim_start/trim_end")
output_filename = input.output_filename
if not output_filename:
stem = Path(source.filename).stem
ext = preset_snapshot.get("container", "mp4") if preset else "mp4"
output_filename = f"{stem}_output.{ext}"
job = create_job(
source_asset_id=source.id,
preset_id=preset.id if preset else None,
preset_snapshot=preset_snapshot,
trim_start=input.trim_start,
trim_end=input.trim_end,
output_filename=output_filename,
output_path=output_filename,
priority=input.priority or 0,
)
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode in ("lambda", "gcp"):
from core.task.executor import get_executor
get_executor().run(
job_id=str(job.id),
source_path=source.file_path,
output_path=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
)
else:
from core.task.tasks import run_transcode_job
result = run_transcode_job.delay(
job_id=str(job.id),
source_key=source.file_path,
output_key=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
return job
@strawberry.mutation
def cancel_job(self, info: Info, id: UUID) -> TranscodeJobType:
from core.db import get_job, update_job
try:
job = get_job(id)
except Exception:
raise Exception("Job not found")
if job.status not in ("pending", "processing"):
raise Exception(f"Cannot cancel job with status: {job.status}")
return update_job(job, status="cancelled")
@strawberry.mutation
def retry_job(self, info: Info, id: UUID) -> TranscodeJobType:
from core.db import get_job, update_job
try:
job = get_job(id)
except Exception:
raise Exception("Job not found")
if job.status != "failed":
raise Exception("Only failed jobs can be retried")
return update_job(job, status="pending", progress=0, error_message=None)
@strawberry.mutation
def update_asset(self, info: Info, id: UUID, input: UpdateAssetInput) -> MediaAssetType:
from core.db import get_asset, update_asset
try:
asset = get_asset(id)
except Exception:
raise Exception("Asset not found")
fields = {}
if input.comments is not None:
fields["comments"] = input.comments
if input.tags is not None:
fields["tags"] = input.tags
if fields:
asset = update_asset(asset, **fields)
return asset
@strawberry.mutation
def delete_asset(self, info: Info, id: UUID) -> DeleteResultType:
from core.db import delete_asset, get_asset
try:
asset = get_asset(id)
delete_asset(asset)
return DeleteResultType(ok=True)
except Exception:
raise Exception("Asset not found")
# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------
schema = strawberry.Schema(
query=Query,
mutation=Mutation,
config=StrawberryConfig(auto_camel_case=False),
)

98
core/api/main.py Normal file
View File

@@ -0,0 +1,98 @@
"""
MPR FastAPI Application
Serves GraphQL API and Lambda callback endpoint.
"""
import os
import sys
from typing import Optional
from uuid import UUID
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
# Initialize Django before importing models
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "admin.mpr.settings")
import django
django.setup()
from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from strawberry.fastapi import GraphQLRouter
from core.api.graphql import schema as graphql_schema
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
app = FastAPI(
title="MPR API",
description="Media Processor — GraphQL API",
version="0.1.0",
docs_url="/docs",
redoc_url="/redoc",
)
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["http://mpr.local.ar", "http://localhost:5173"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# GraphQL
graphql_router = GraphQLRouter(schema=graphql_schema, graphql_ide="graphiql")
app.include_router(graphql_router, prefix="/graphql")
@app.get("/")
def root():
"""API root."""
return {
"name": "MPR API",
"version": "0.1.0",
"graphql": "/graphql",
}
@app.post("/api/jobs/{job_id}/callback")
def job_callback(
job_id: UUID,
payload: dict,
x_api_key: Optional[str] = Header(None),
):
"""
Callback endpoint for Lambda to report job completion.
Protected by API key.
"""
if CALLBACK_API_KEY and x_api_key != CALLBACK_API_KEY:
raise HTTPException(status_code=403, detail="Invalid API key")
from django.utils import timezone
from core.db import get_job, update_job
try:
job = get_job(job_id)
except Exception:
raise HTTPException(status_code=404, detail="Job not found")
status = payload.get("status", "failed")
fields = {
"status": status,
"progress": 100.0 if status == "completed" else job.progress,
}
if payload.get("error"):
fields["error_message"] = payload["error"]
if status in ("completed", "failed"):
fields["completed_at"] = timezone.now()
update_job(job, **fields)
return {"ok": True}

158
core/api/schema/graphql.py Normal file
View File

@@ -0,0 +1,158 @@
"""
Strawberry Types - GENERATED FILE
Do not edit directly. Regenerate using modelgen.
"""
import strawberry
from enum import Enum
from typing import List, Optional
from uuid import UUID
from datetime import datetime
from strawberry.scalars import JSON
@strawberry.enum
class AssetStatus(Enum):
PENDING = "pending"
READY = "ready"
ERROR = "error"
@strawberry.enum
class JobStatus(Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@strawberry.type
class MediaAssetType:
"""A video/audio file registered in the system."""
id: Optional[UUID] = None
filename: Optional[str] = None
file_path: Optional[str] = None
status: Optional[str] = None
error_message: Optional[str] = None
file_size: Optional[int] = None
duration: Optional[float] = None
video_codec: Optional[str] = None
audio_codec: Optional[str] = None
width: Optional[int] = None
height: Optional[int] = None
framerate: Optional[float] = None
bitrate: Optional[int] = None
properties: Optional[JSON] = None
comments: Optional[str] = None
tags: Optional[List[str]] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
@strawberry.type
class TranscodePresetType:
"""A reusable transcoding configuration (like Handbrake presets)."""
id: Optional[UUID] = None
name: Optional[str] = None
description: Optional[str] = None
is_builtin: Optional[bool] = None
container: Optional[str] = None
video_codec: Optional[str] = None
video_bitrate: Optional[str] = None
video_crf: Optional[int] = None
video_preset: Optional[str] = None
resolution: Optional[str] = None
framerate: Optional[float] = None
audio_codec: Optional[str] = None
audio_bitrate: Optional[str] = None
audio_channels: Optional[int] = None
audio_samplerate: Optional[int] = None
extra_args: Optional[List[str]] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
@strawberry.type
class TranscodeJobType:
"""A transcoding or trimming job in the queue."""
id: Optional[UUID] = None
source_asset_id: Optional[UUID] = None
preset_id: Optional[UUID] = None
preset_snapshot: Optional[JSON] = None
trim_start: Optional[float] = None
trim_end: Optional[float] = None
output_filename: Optional[str] = None
output_path: Optional[str] = None
output_asset_id: Optional[UUID] = None
status: Optional[str] = None
progress: Optional[float] = None
current_frame: Optional[int] = None
current_time: Optional[float] = None
speed: Optional[str] = None
error_message: Optional[str] = None
celery_task_id: Optional[str] = None
execution_arn: Optional[str] = None
priority: Optional[int] = None
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@strawberry.input
class CreateJobInput:
"""Request body for creating a transcode/trim job."""
source_asset_id: UUID
preset_id: Optional[UUID] = None
trim_start: Optional[float] = None
trim_end: Optional[float] = None
output_filename: Optional[str] = None
priority: int = 0
@strawberry.input
class UpdateAssetInput:
"""Request body for updating asset metadata."""
comments: Optional[str] = None
tags: Optional[List[str]] = None
@strawberry.type
class SystemStatusType:
"""System status response."""
status: Optional[str] = None
version: Optional[str] = None
@strawberry.type
class ScanResultType:
"""Result of scanning the media input bucket."""
found: Optional[int] = None
registered: Optional[int] = None
skipped: Optional[int] = None
files: Optional[List[str]] = None
@strawberry.type
class DeleteResultType:
"""Result of a delete operation."""
ok: Optional[bool] = None
@strawberry.type
class WorkerStatusType:
"""Worker health and capabilities."""
available: Optional[bool] = None
active_jobs: Optional[int] = None
supported_codecs: Optional[List[str]] = None
gpu_available: Optional[bool] = None

19
core/db/__init__.py Normal file
View File

@@ -0,0 +1,19 @@
from .assets import (
create_asset,
delete_asset,
get_asset,
get_asset_filenames,
list_assets,
update_asset,
)
from .jobs import (
create_job,
get_job,
list_jobs,
update_job,
update_job_fields,
)
from .presets import (
get_preset,
list_presets,
)

48
core/db/assets.py Normal file
View File

@@ -0,0 +1,48 @@
"""Database operations for MediaAsset."""
from typing import Optional
from uuid import UUID
def list_assets(status: Optional[str] = None, search: Optional[str] = None):
from admin.mpr.media_assets.models import MediaAsset
qs = MediaAsset.objects.all()
if status:
qs = qs.filter(status=status)
if search:
qs = qs.filter(filename__icontains=search)
return list(qs)
def get_asset(id: UUID):
from admin.mpr.media_assets.models import MediaAsset
return MediaAsset.objects.get(id=id)
def get_asset_filenames() -> set[str]:
from admin.mpr.media_assets.models import MediaAsset
return set(MediaAsset.objects.values_list("filename", flat=True))
def create_asset(*, filename: str, file_path: str, file_size: int):
from admin.mpr.media_assets.models import MediaAsset
return MediaAsset.objects.create(
filename=filename,
file_path=file_path,
file_size=file_size,
)
def update_asset(asset, **fields):
for key, value in fields.items():
setattr(asset, key, value)
asset.save(update_fields=list(fields.keys()))
return asset
def delete_asset(asset):
asset.delete()

40
core/db/jobs.py Normal file
View File

@@ -0,0 +1,40 @@
"""Database operations for TranscodeJob."""
from typing import Optional
from uuid import UUID
def list_jobs(status: Optional[str] = None, source_asset_id: Optional[UUID] = None):
from admin.mpr.media_assets.models import TranscodeJob
qs = TranscodeJob.objects.all()
if status:
qs = qs.filter(status=status)
if source_asset_id:
qs = qs.filter(source_asset_id=source_asset_id)
return list(qs)
def get_job(id: UUID):
from admin.mpr.media_assets.models import TranscodeJob
return TranscodeJob.objects.get(id=id)
def create_job(**fields):
from admin.mpr.media_assets.models import TranscodeJob
return TranscodeJob.objects.create(**fields)
def update_job(job, **fields):
for key, value in fields.items():
setattr(job, key, value)
job.save(update_fields=list(fields.keys()))
return job
def update_job_fields(job_id, **fields):
from admin.mpr.media_assets.models import TranscodeJob
TranscodeJob.objects.filter(id=job_id).update(**fields)

15
core/db/presets.py Normal file
View File

@@ -0,0 +1,15 @@
"""Database operations for TranscodePreset."""
from uuid import UUID
def list_presets():
from admin.mpr.media_assets.models import TranscodePreset
return list(TranscodePreset.objects.all())
def get_preset(id: UUID):
from admin.mpr.media_assets.models import TranscodePreset
return TranscodePreset.objects.get(id=id)

10
core/rpc/__init__.py Normal file
View File

@@ -0,0 +1,10 @@
"""
MPR gRPC Module
Provides gRPC server and client for worker communication.
Generated stubs (worker_pb2.py, worker_pb2_grpc.py) are created by:
python schema/generate.py --proto
Requires: grpcio, grpcio-tools
"""

204
core/rpc/client.py Normal file
View File

@@ -0,0 +1,204 @@
"""
gRPC Client - Used by FastAPI to communicate with workers.
"""
import json
import logging
import os
from typing import Callable, Iterator, Optional
import grpc
# Generated stubs - run `python schema/generate.py --proto` if missing
from . import worker_pb2, worker_pb2_grpc
logger = logging.getLogger(__name__)
# Configuration from environment
GRPC_HOST = os.environ.get("GRPC_HOST", "grpc")
GRPC_PORT = int(os.environ.get("GRPC_PORT", "50051"))
class WorkerClient:
"""gRPC client for worker communication."""
def __init__(self, host: str = None, port: int = None):
"""
Initialize the client.
Args:
host: gRPC server host (defaults to GRPC_HOST env var)
port: gRPC server port (defaults to GRPC_PORT env var)
"""
self.host = host or GRPC_HOST
self.port = port or GRPC_PORT
self.address = f"{self.host}:{self.port}"
self._channel: Optional[grpc.Channel] = None
self._stub: Optional[worker_pb2_grpc.WorkerServiceStub] = None
def _ensure_connected(self) -> worker_pb2_grpc.WorkerServiceStub:
"""Ensure channel is connected and return stub."""
if self._channel is None:
self._channel = grpc.insecure_channel(self.address)
self._stub = worker_pb2_grpc.WorkerServiceStub(self._channel)
return self._stub
def close(self) -> None:
"""Close the channel."""
if self._channel:
self._channel.close()
self._channel = None
self._stub = None
def __enter__(self):
self._ensure_connected()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def submit_job(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[dict] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
) -> tuple[bool, str]:
"""
Submit a job to the worker.
Args:
job_id: Unique job identifier
source_path: Path to source file
output_path: Path for output file
preset: Transcode preset dict (optional)
trim_start: Trim start time in seconds (optional)
trim_end: Trim end time in seconds (optional)
Returns:
Tuple of (accepted: bool, message: str)
"""
stub = self._ensure_connected()
request = worker_pb2.JobRequest(
job_id=job_id,
source_path=source_path,
output_path=output_path,
preset_json=json.dumps(preset) if preset else "",
)
if trim_start is not None:
request.trim_start = trim_start
if trim_end is not None:
request.trim_end = trim_end
try:
response = stub.SubmitJob(request)
return response.accepted, response.message
except grpc.RpcError as e:
logger.error(f"SubmitJob RPC failed: {e}")
return False, str(e)
def stream_progress(
self,
job_id: str,
callback: Optional[Callable[[dict], None]] = None,
) -> Iterator[dict]:
"""
Stream progress updates for a job.
Args:
job_id: Job identifier
callback: Optional callback for each update
Yields:
Progress update dicts
"""
stub = self._ensure_connected()
request = worker_pb2.ProgressRequest(job_id=job_id)
try:
for update in stub.StreamProgress(request):
progress = {
"job_id": update.job_id,
"progress": update.progress,
"current_frame": update.current_frame,
"current_time": update.current_time,
"speed": update.speed,
"status": update.status,
"error": update.error if update.HasField("error") else None,
}
if callback:
callback(progress)
yield progress
if update.status in ("completed", "failed", "cancelled"):
break
except grpc.RpcError as e:
logger.error(f"StreamProgress RPC failed: {e}")
yield {
"job_id": job_id,
"progress": 0,
"status": "error",
"error": str(e),
}
def cancel_job(self, job_id: str) -> tuple[bool, str]:
"""
Cancel a running job.
Args:
job_id: Job identifier
Returns:
Tuple of (cancelled: bool, message: str)
"""
stub = self._ensure_connected()
request = worker_pb2.CancelRequest(job_id=job_id)
try:
response = stub.CancelJob(request)
return response.cancelled, response.message
except grpc.RpcError as e:
logger.error(f"CancelJob RPC failed: {e}")
return False, str(e)
def get_worker_status(self) -> Optional[dict]:
"""
Get worker status and capabilities.
Returns:
Status dict or None on error
"""
stub = self._ensure_connected()
try:
response = stub.GetWorkerStatus(worker_pb2.Empty())
return {
"available": response.available,
"active_jobs": response.active_jobs,
"supported_codecs": list(response.supported_codecs),
"gpu_available": response.gpu_available,
}
except grpc.RpcError as e:
logger.error(f"GetWorkerStatus RPC failed: {e}")
return None
# Singleton client instance
_client: Optional[WorkerClient] = None
def get_client() -> WorkerClient:
"""Get or create the singleton client (uses env vars for config)."""
global _client
if _client is None:
_client = WorkerClient()
return _client

View File

@@ -0,0 +1,64 @@
// Protocol Buffer Definitions - GENERATED FILE
//
// Do not edit directly. Regenerate using modelgen.
syntax = "proto3";
package mpr.worker;
service WorkerService {
rpc SubmitJob(JobRequest) returns (JobResponse);
rpc StreamProgress(ProgressRequest) returns (stream ProgressUpdate);
rpc CancelJob(CancelRequest) returns (CancelResponse);
rpc GetWorkerStatus(Empty) returns (WorkerStatus);
}
message JobRequest {
string job_id = 1;
string source_path = 2;
string output_path = 3;
string preset_json = 4;
optional float trim_start = 5;
optional float trim_end = 6;
}
message JobResponse {
string job_id = 1;
bool accepted = 2;
string message = 3;
}
message ProgressRequest {
string job_id = 1;
}
message ProgressUpdate {
string job_id = 1;
int32 progress = 2;
int32 current_frame = 3;
float current_time = 4;
float speed = 5;
string status = 6;
optional string error = 7;
}
message CancelRequest {
string job_id = 1;
}
message CancelResponse {
string job_id = 1;
bool cancelled = 2;
string message = 3;
}
message WorkerStatus {
bool available = 1;
int32 active_jobs = 2;
repeated string supported_codecs = 3;
bool gpu_available = 4;
}
message Empty {
// Empty
}

273
core/rpc/server.py Normal file
View File

@@ -0,0 +1,273 @@
"""
gRPC Server - Worker Service Implementation
Runs in the worker process to handle job submissions and progress streaming.
"""
import json
import logging
import os
import time
from concurrent import futures
from typing import Iterator
import grpc
# Configuration from environment
GRPC_PORT = int(os.environ.get("GRPC_PORT", "50051"))
GRPC_MAX_WORKERS = int(os.environ.get("GRPC_MAX_WORKERS", "10"))
# Generated stubs - run `python schema/generate.py --proto` if missing
from . import worker_pb2, worker_pb2_grpc
logger = logging.getLogger(__name__)
# Active jobs progress tracking (shared state for streaming)
_active_jobs: dict[str, dict] = {}
class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer):
"""gRPC service implementation for worker operations."""
def __init__(self, celery_app=None):
"""
Initialize the servicer.
Args:
celery_app: Optional Celery app for task dispatch
"""
self.celery_app = celery_app
def SubmitJob(self, request, context):
"""Submit a transcode/trim job to the worker."""
job_id = request.job_id
logger.info(f"SubmitJob: {job_id}")
try:
# Parse preset
preset = json.loads(request.preset_json) if request.preset_json else None
# Initialize progress tracking
_active_jobs[job_id] = {
"status": "pending",
"progress": 0,
"current_frame": 0,
"current_time": 0.0,
"speed": 0.0,
"error": None,
}
# Dispatch to Celery if available
if self.celery_app:
from core.task.tasks import run_transcode_job
task = run_transcode_job.delay(
job_id=job_id,
source_path=request.source_path,
output_path=request.output_path,
preset=preset,
trim_start=request.trim_start
if request.HasField("trim_start")
else None,
trim_end=request.trim_end if request.HasField("trim_end") else None,
)
_active_jobs[job_id]["celery_task_id"] = task.id
return worker_pb2.JobResponse(
job_id=job_id,
accepted=True,
message="Job submitted",
)
except Exception as e:
logger.exception(f"SubmitJob failed: {e}")
return worker_pb2.JobResponse(
job_id=job_id,
accepted=False,
message=str(e),
)
def StreamProgress(self, request, context) -> Iterator[worker_pb2.ProgressUpdate]:
"""Stream progress updates for a job."""
job_id = request.job_id
logger.info(f"StreamProgress: {job_id}")
# Check if job exists
if job_id not in _active_jobs:
yield worker_pb2.ProgressUpdate(
job_id=job_id,
progress=0,
status="not_found",
error="Job not found",
)
return
# Stream updates until job completes
last_progress = -1
while True:
if context.cancelled():
logger.info(f"StreamProgress cancelled: {job_id}")
break
job_state = _active_jobs.get(job_id)
if not job_state:
break
# Only yield if progress changed
if job_state["progress"] != last_progress:
last_progress = job_state["progress"]
yield worker_pb2.ProgressUpdate(
job_id=job_id,
progress=job_state["progress"],
current_frame=job_state.get("current_frame", 0),
current_time=job_state.get("current_time", 0.0),
speed=job_state.get("speed", 0.0),
status=job_state["status"],
error=job_state.get("error"),
)
# Exit if job is done
if job_state["status"] in ("completed", "failed", "cancelled"):
break
# Small delay to avoid busy loop
time.sleep(0.1)
# Cleanup completed jobs
if job_id in _active_jobs:
status = _active_jobs[job_id].get("status")
if status in ("completed", "failed", "cancelled"):
_active_jobs.pop(job_id, None)
def CancelJob(self, request, context):
"""Cancel a running job."""
job_id = request.job_id
logger.info(f"CancelJob: {job_id}")
if job_id in _active_jobs:
_active_jobs[job_id]["status"] = "cancelled"
# Revoke Celery task if available
if self.celery_app:
task_id = _active_jobs[job_id].get("celery_task_id")
if task_id:
self.celery_app.control.revoke(task_id, terminate=True)
return worker_pb2.CancelResponse(
job_id=job_id,
cancelled=True,
message="Job cancelled",
)
return worker_pb2.CancelResponse(
job_id=job_id,
cancelled=False,
message="Job not found",
)
def GetWorkerStatus(self, request, context):
"""Get worker health and capabilities."""
try:
from core.ffmpeg import get_encoders
encoders = get_encoders()
codec_names = [e["name"] for e in encoders.get("video", [])]
except Exception:
codec_names = []
# Check for GPU encoders
gpu_available = any(
"nvenc" in name or "vaapi" in name or "qsv" in name for name in codec_names
)
return worker_pb2.WorkerStatus(
available=True,
active_jobs=len(_active_jobs),
supported_codecs=codec_names[:20], # Limit to 20
gpu_available=gpu_available,
)
def update_job_progress(
job_id: str,
progress: int,
current_frame: int = 0,
current_time: float = 0.0,
speed: float = 0.0,
status: str = "processing",
error: str = None,
) -> None:
"""
Update job progress (called from worker tasks).
Updates both the in-memory gRPC state and the Django database.
"""
if job_id in _active_jobs:
_active_jobs[job_id].update(
{
"progress": progress,
"current_frame": current_frame,
"current_time": current_time,
"speed": speed,
"status": status,
"error": error,
}
)
# Update Django database
try:
from django.utils import timezone
from core.db import update_job_fields
updates = {
"progress": progress,
"current_frame": current_frame,
"current_time": current_time,
"speed": str(speed),
"status": status,
}
if error:
updates["error_message"] = error
if status == "processing":
updates["started_at"] = timezone.now()
elif status in ("completed", "failed"):
updates["completed_at"] = timezone.now()
update_job_fields(job_id, **updates)
except Exception as e:
logger.warning(f"Failed to update job {job_id} in DB: {e}")
def serve(port: int = None, celery_app=None) -> grpc.Server:
"""
Start the gRPC server.
Args:
port: Port to listen on (defaults to GRPC_PORT env var)
celery_app: Optional Celery app for task dispatch
Returns:
The running gRPC server
"""
if port is None:
port = GRPC_PORT
server = grpc.server(futures.ThreadPoolExecutor(max_workers=GRPC_MAX_WORKERS))
worker_pb2_grpc.add_WorkerServiceServicer_to_server(
WorkerServicer(celery_app=celery_app),
server,
)
server.add_insecure_port(f"[::]:{port}")
server.start()
logger.info(f"gRPC server started on port {port}")
return server
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
server = serve()
server.wait_for_termination()

52
core/rpc/worker_pb2.py Normal file
View File

@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: worker.proto
# Protobuf Python Version: 6.31.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
6,
31,
1,
'',
'worker.proto'
)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cworker.proto\x12\nmpr.worker\"\xa7\x01\n\nJobRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x13\n\x0bsource_path\x18\x02 \x01(\t\x12\x13\n\x0boutput_path\x18\x03 \x01(\t\x12\x13\n\x0bpreset_json\x18\x04 \x01(\t\x12\x17\n\ntrim_start\x18\x05 \x01(\x02H\x00\x88\x01\x01\x12\x15\n\x08trim_end\x18\x06 \x01(\x02H\x01\x88\x01\x01\x42\r\n\x0b_trim_startB\x0b\n\t_trim_end\"@\n\x0bJobResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08\x61\x63\x63\x65pted\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"!\n\x0fProgressRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x9c\x01\n\x0eProgressUpdate\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08progress\x18\x02 \x01(\x05\x12\x15\n\rcurrent_frame\x18\x03 \x01(\x05\x12\x14\n\x0c\x63urrent_time\x18\x04 \x01(\x02\x12\r\n\x05speed\x18\x05 \x01(\x02\x12\x0e\n\x06status\x18\x06 \x01(\t\x12\x12\n\x05\x65rror\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_error\"\x1f\n\rCancelRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"D\n\x0e\x43\x61ncelResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x11\n\tcancelled\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"g\n\x0cWorkerStatus\x12\x11\n\tavailable\x18\x01 \x01(\x08\x12\x13\n\x0b\x61\x63tive_jobs\x18\x02 \x01(\x05\x12\x18\n\x10supported_codecs\x18\x03 \x03(\t\x12\x15\n\rgpu_available\x18\x04 \x01(\x08\"\x07\n\x05\x45mpty2\x9e\x02\n\rWorkerService\x12<\n\tSubmitJob\x12\x16.mpr.worker.JobRequest\x1a\x17.mpr.worker.JobResponse\x12K\n\x0eStreamProgress\x12\x1b.mpr.worker.ProgressRequest\x1a\x1a.mpr.worker.ProgressUpdate0\x01\x12\x42\n\tCancelJob\x12\x19.mpr.worker.CancelRequest\x1a\x1a.mpr.worker.CancelResponse\x12>\n\x0fGetWorkerStatus\x12\x11.mpr.worker.Empty\x1a\x18.mpr.worker.WorkerStatusb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'worker_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._loaded_options = None
_globals['_JOBREQUEST']._serialized_start=29
_globals['_JOBREQUEST']._serialized_end=196
_globals['_JOBRESPONSE']._serialized_start=198
_globals['_JOBRESPONSE']._serialized_end=262
_globals['_PROGRESSREQUEST']._serialized_start=264
_globals['_PROGRESSREQUEST']._serialized_end=297
_globals['_PROGRESSUPDATE']._serialized_start=300
_globals['_PROGRESSUPDATE']._serialized_end=456
_globals['_CANCELREQUEST']._serialized_start=458
_globals['_CANCELREQUEST']._serialized_end=489
_globals['_CANCELRESPONSE']._serialized_start=491
_globals['_CANCELRESPONSE']._serialized_end=559
_globals['_WORKERSTATUS']._serialized_start=561
_globals['_WORKERSTATUS']._serialized_end=664
_globals['_EMPTY']._serialized_start=666
_globals['_EMPTY']._serialized_end=673
_globals['_WORKERSERVICE']._serialized_start=676
_globals['_WORKERSERVICE']._serialized_end=962
# @@protoc_insertion_point(module_scope)

226
core/rpc/worker_pb2_grpc.py Normal file
View File

@@ -0,0 +1,226 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
from . import worker_pb2 as worker__pb2
GRPC_GENERATED_VERSION = '1.76.0'
GRPC_VERSION = grpc.__version__
_version_not_supported = False
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
raise RuntimeError(
f'The grpc package installed is at version {GRPC_VERSION},'
+ ' but the generated code in worker_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
)
class WorkerServiceStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.SubmitJob = channel.unary_unary(
'/mpr.worker.WorkerService/SubmitJob',
request_serializer=worker__pb2.JobRequest.SerializeToString,
response_deserializer=worker__pb2.JobResponse.FromString,
_registered_method=True)
self.StreamProgress = channel.unary_stream(
'/mpr.worker.WorkerService/StreamProgress',
request_serializer=worker__pb2.ProgressRequest.SerializeToString,
response_deserializer=worker__pb2.ProgressUpdate.FromString,
_registered_method=True)
self.CancelJob = channel.unary_unary(
'/mpr.worker.WorkerService/CancelJob',
request_serializer=worker__pb2.CancelRequest.SerializeToString,
response_deserializer=worker__pb2.CancelResponse.FromString,
_registered_method=True)
self.GetWorkerStatus = channel.unary_unary(
'/mpr.worker.WorkerService/GetWorkerStatus',
request_serializer=worker__pb2.Empty.SerializeToString,
response_deserializer=worker__pb2.WorkerStatus.FromString,
_registered_method=True)
class WorkerServiceServicer(object):
"""Missing associated documentation comment in .proto file."""
def SubmitJob(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def StreamProgress(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def CancelJob(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetWorkerStatus(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_WorkerServiceServicer_to_server(servicer, server):
rpc_method_handlers = {
'SubmitJob': grpc.unary_unary_rpc_method_handler(
servicer.SubmitJob,
request_deserializer=worker__pb2.JobRequest.FromString,
response_serializer=worker__pb2.JobResponse.SerializeToString,
),
'StreamProgress': grpc.unary_stream_rpc_method_handler(
servicer.StreamProgress,
request_deserializer=worker__pb2.ProgressRequest.FromString,
response_serializer=worker__pb2.ProgressUpdate.SerializeToString,
),
'CancelJob': grpc.unary_unary_rpc_method_handler(
servicer.CancelJob,
request_deserializer=worker__pb2.CancelRequest.FromString,
response_serializer=worker__pb2.CancelResponse.SerializeToString,
),
'GetWorkerStatus': grpc.unary_unary_rpc_method_handler(
servicer.GetWorkerStatus,
request_deserializer=worker__pb2.Empty.FromString,
response_serializer=worker__pb2.WorkerStatus.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'mpr.worker.WorkerService', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
server.add_registered_method_handlers('mpr.worker.WorkerService', rpc_method_handlers)
# This class is part of an EXPERIMENTAL API.
class WorkerService(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def SubmitJob(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/mpr.worker.WorkerService/SubmitJob',
worker__pb2.JobRequest.SerializeToString,
worker__pb2.JobResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def StreamProgress(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_stream(
request,
target,
'/mpr.worker.WorkerService/StreamProgress',
worker__pb2.ProgressRequest.SerializeToString,
worker__pb2.ProgressUpdate.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def CancelJob(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/mpr.worker.WorkerService/CancelJob',
worker__pb2.CancelRequest.SerializeToString,
worker__pb2.CancelResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetWorkerStatus(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/mpr.worker.WorkerService/GetWorkerStatus',
worker__pb2.Empty.SerializeToString,
worker__pb2.WorkerStatus.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)

58
core/schema/__init__.py Normal file
View File

@@ -0,0 +1,58 @@
"""
MPR Schema Definitions - Source of Truth
This package defines the core data models as Python dataclasses.
These definitions are used to generate:
- Django ORM models (mpr/media_assets/models.py)
- Pydantic schemas (api/schema/*.py)
- TypeScript types (ui/timeline/src/types.ts)
- Protobuf definitions (grpc/protos/worker.proto)
Run `python schema/generate.py` to regenerate all targets.
"""
from .models import (
BUILTIN_PRESETS,
# For generator
DATACLASSES,
ENUMS,
GRPC_MESSAGES,
# gRPC
GRPC_SERVICE,
# Enums
AssetStatus,
CancelRequest,
CancelResponse,
Empty,
JobRequest,
JobResponse,
JobStatus,
# Models
MediaAsset,
ProgressRequest,
ProgressUpdate,
TranscodeJob,
TranscodePreset,
WorkerStatus,
)
__all__ = [
"MediaAsset",
"TranscodePreset",
"TranscodeJob",
"AssetStatus",
"JobStatus",
"GRPC_SERVICE",
"JobRequest",
"JobResponse",
"ProgressRequest",
"ProgressUpdate",
"CancelRequest",
"CancelResponse",
"WorkerStatus",
"Empty",
"DATACLASSES",
"ENUMS",
"GRPC_MESSAGES",
"BUILTIN_PRESETS",
]

25
core/schema/modelgen.json Normal file
View File

@@ -0,0 +1,25 @@
{
"schema": "core/schema/models",
"targets": [
{
"target": "django",
"output": "admin/mpr/media_assets/models.py",
"include": ["dataclasses", "enums"]
},
{
"target": "graphene",
"output": "core/api/schema/graphql.py",
"include": ["dataclasses", "enums", "api"]
},
{
"target": "typescript",
"output": "ui/timeline/src/types.ts",
"include": ["dataclasses", "enums", "api"]
},
{
"target": "protobuf",
"output": "core/rpc/protos/worker.proto",
"include": ["grpc"]
}
]
}

View File

@@ -0,0 +1,89 @@
"""
MPR Schema Models
This module exports all dataclasses, enums, and constants that the generator
should process. Add new models here to have them included in generation.
"""
from .api import (
CreateJobRequest,
DeleteResult,
ScanResult,
SystemStatus,
UpdateAssetRequest,
)
from .grpc import (
GRPC_SERVICE,
CancelRequest,
CancelResponse,
Empty,
JobRequest,
JobResponse,
ProgressRequest,
ProgressUpdate,
WorkerStatus,
)
from .jobs import JobStatus, TranscodeJob
from .media import AssetStatus, MediaAsset
from .presets import BUILTIN_PRESETS, TranscodePreset
# Core domain models - generates Django, Pydantic, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob]
# API request/response models - generates TypeScript only (no Django)
# WorkerStatus from grpc.py is reused here
API_MODELS = [
CreateJobRequest,
UpdateAssetRequest,
SystemStatus,
ScanResult,
DeleteResult,
WorkerStatus,
]
# Status enums - included in generated code
ENUMS = [AssetStatus, JobStatus]
# gRPC messages - generates Proto
GRPC_MESSAGES = [
JobRequest,
JobResponse,
ProgressRequest,
ProgressUpdate,
CancelRequest,
CancelResponse,
WorkerStatus,
Empty,
]
__all__ = [
# Models
"MediaAsset",
"TranscodePreset",
"TranscodeJob",
# API Models
"CreateJobRequest",
"UpdateAssetRequest",
"DeleteResult",
"ScanResult",
"SystemStatus",
# Enums
"AssetStatus",
"JobStatus",
# gRPC
"GRPC_SERVICE",
"JobRequest",
"JobResponse",
"ProgressRequest",
"ProgressUpdate",
"CancelRequest",
"CancelResponse",
"WorkerStatus",
"Empty",
# For generator
"DATACLASSES",
"API_MODELS",
"ENUMS",
"GRPC_MESSAGES",
"BUILTIN_PRESETS",
]

58
core/schema/models/api.py Normal file
View File

@@ -0,0 +1,58 @@
"""
API Request/Response Schema Definitions
These are separate from the main domain models and represent
the shape of data sent to/from the API endpoints.
"""
from dataclasses import dataclass, field
from typing import List, Optional
from uuid import UUID
@dataclass
class CreateJobRequest:
"""Request body for creating a transcode/trim job."""
source_asset_id: UUID
preset_id: Optional[UUID] = None
trim_start: Optional[float] = None # seconds
trim_end: Optional[float] = None # seconds
output_filename: Optional[str] = None
priority: int = 0
@dataclass
class SystemStatus:
"""System status response."""
status: str
version: str
@dataclass
class ScanResult:
"""Result of scanning the media input bucket."""
found: int = 0
registered: int = 0
skipped: int = 0
files: List[str] = field(default_factory=list)
@dataclass
class UpdateAssetRequest:
"""Request body for updating asset metadata."""
comments: Optional[str] = None
tags: Optional[List[str]] = None
@dataclass
class DeleteResult:
"""Result of a delete operation."""
ok: bool = False
# Note: WorkerStatus is defined in grpc.py and reused here

130
core/schema/models/grpc.py Normal file
View File

@@ -0,0 +1,130 @@
"""
gRPC message definitions for MPR worker communication.
This is the source of truth for gRPC messages. The generator creates:
- rpc/protos/worker.proto (protobuf definition)
- rpc/worker_pb2.py (generated Python classes)
- rpc/worker_pb2_grpc.py (generated gRPC stubs)
"""
from dataclasses import dataclass
from typing import Optional
# -----------------------------------------------------------------------------
# Request Messages
# -----------------------------------------------------------------------------
@dataclass
class JobRequest:
"""Request to submit a transcode/trim job."""
job_id: str
source_path: str
output_path: str
preset_json: str # Serialized TranscodePreset
trim_start: Optional[float] = None
trim_end: Optional[float] = None
@dataclass
class ProgressRequest:
"""Request to stream progress updates for a job."""
job_id: str
@dataclass
class CancelRequest:
"""Request to cancel a running job."""
job_id: str
@dataclass
class Empty:
"""Empty message for requests with no parameters."""
pass
# -----------------------------------------------------------------------------
# Response Messages
# -----------------------------------------------------------------------------
@dataclass
class JobResponse:
"""Response after submitting a job."""
job_id: str
accepted: bool
message: str
@dataclass
class ProgressUpdate:
"""Streaming progress update from worker."""
job_id: str
progress: int # 0-100
current_frame: int
current_time: float
speed: float # e.g., 2.5x
status: str # pending, processing, completed, failed, cancelled
error: Optional[str] = None
@dataclass
class CancelResponse:
"""Response after cancel request."""
job_id: str
cancelled: bool
message: str
@dataclass
class WorkerStatus:
"""Worker health and capabilities."""
available: bool
active_jobs: int
supported_codecs: list[str]
gpu_available: bool
# -----------------------------------------------------------------------------
# Service Definition (for documentation, generator uses this)
# -----------------------------------------------------------------------------
GRPC_SERVICE = {
"name": "WorkerService",
"package": "mpr.worker",
"methods": [
{
"name": "SubmitJob",
"request": JobRequest,
"response": JobResponse,
"stream_response": False,
},
{
"name": "StreamProgress",
"request": ProgressRequest,
"response": ProgressUpdate,
"stream_response": True, # Server streaming
},
{
"name": "CancelJob",
"request": CancelRequest,
"response": CancelResponse,
"stream_response": False,
},
{
"name": "GetWorkerStatus",
"request": Empty,
"response": WorkerStatus,
"stream_response": False,
},
],
}

View File

@@ -0,0 +1,79 @@
"""
TranscodeJob Schema Definition
Source of truth for job data model.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Optional
from uuid import UUID
class JobStatus(str, Enum):
"""Status of a transcode/trim job."""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class TranscodeJob:
"""
A transcoding or trimming job in the queue.
Jobs can either:
- Transcode using a preset (full re-encode)
- Trim only (stream copy with -c:v copy -c:a copy)
A trim-only job has no preset and uses stream copy.
"""
id: UUID
# Input
source_asset_id: UUID
# Configuration
preset_id: Optional[UUID] = None
preset_snapshot: Dict[str, Any] = field(
default_factory=dict
) # Copy at creation time
# Trimming (optional)
trim_start: Optional[float] = None # seconds
trim_end: Optional[float] = None # seconds
# Output
output_filename: str = ""
output_path: Optional[str] = None
output_asset_id: Optional[UUID] = None
# Status & Progress
status: JobStatus = JobStatus.PENDING
progress: float = 0.0 # 0.0 to 100.0
current_frame: Optional[int] = None
current_time: Optional[float] = None # seconds processed
speed: Optional[str] = None # "2.5x"
error_message: Optional[str] = None
# Worker tracking
celery_task_id: Optional[str] = None
execution_arn: Optional[str] = None # AWS Step Functions execution ARN
priority: int = 0 # Lower = higher priority
# Timestamps
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@property
def is_trim_only(self) -> bool:
"""Check if this is a trim-only job (stream copy, no transcode)."""
return self.preset_id is None and (
self.trim_start is not None or self.trim_end is not None
)

View File

@@ -0,0 +1,59 @@
"""
MediaAsset Schema Definition
Source of truth for media asset data model.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from uuid import UUID
class AssetStatus(str, Enum):
"""Status of a media asset after probing."""
PENDING = "pending"
READY = "ready"
ERROR = "error"
@dataclass
class MediaAsset:
"""
A video/audio file registered in the system.
Metadata is populated asynchronously via ffprobe after registration.
"""
id: UUID
filename: str
file_path: str
# Status
status: AssetStatus = AssetStatus.PENDING
error_message: Optional[str] = None
# File info
file_size: Optional[int] = None
# Media metadata (populated by ffprobe)
duration: Optional[float] = None # seconds
video_codec: Optional[str] = None
audio_codec: Optional[str] = None
width: Optional[int] = None
height: Optional[int] = None
framerate: Optional[float] = None
bitrate: Optional[int] = None # bits per second
# Full ffprobe output and custom metadata
properties: Dict[str, Any] = field(default_factory=dict)
# User annotations
comments: str = ""
tags: List[str] = field(default_factory=list)
# Timestamps
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None

View File

@@ -0,0 +1,128 @@
"""
TranscodePreset Schema Definition
Source of truth for preset data model.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Optional
from uuid import UUID
@dataclass
class TranscodePreset:
"""
A reusable transcoding configuration (like Handbrake presets).
Presets can be builtin (shipped with the app) or user-created.
"""
id: UUID
name: str
description: str = ""
is_builtin: bool = False
# Output container
container: str = "mp4" # mp4, mkv, webm, mov, avi
# Video settings
video_codec: str = "libx264"
video_bitrate: Optional[str] = None # "2M", "5000k"
video_crf: Optional[int] = None # Quality-based (0-51 for x264)
video_preset: Optional[str] = None # ultrafast...veryslow
resolution: Optional[str] = None # "1920x1080", "1280x720"
framerate: Optional[float] = None
# Audio settings
audio_codec: str = "aac"
audio_bitrate: Optional[str] = None # "128k", "320k"
audio_channels: Optional[int] = None # 2 for stereo
audio_samplerate: Optional[int] = None # 44100, 48000
# Advanced: extra FFmpeg arguments
extra_args: List[str] = field(default_factory=list)
# Timestamps
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
BUILTIN_PRESETS = [
{
"name": "DaVinci Resolve",
"description": "MPEG-4 (xvid) + MP3 - Compatible with DaVinci Resolve Free",
"container": "avi",
"video_codec": "mpeg4",
"video_crf": 5,
"audio_codec": "libmp3lame",
"audio_bitrate": "320k",
"audio_samplerate": 48000,
"extra_args": ["-vtag", "xvid", "-pix_fmt", "yuv420p"],
},
{
"name": "Copy (Trim Only)",
"description": "Stream copy - No transcoding, fast trimming only",
"container": "mp4",
"video_codec": "copy",
"audio_codec": "copy",
},
{
"name": "Web H.264",
"description": "H.264 + AAC - General web playback",
"container": "mp4",
"video_codec": "libx264",
"video_crf": 23,
"video_preset": "medium",
"audio_codec": "aac",
"audio_bitrate": "128k",
},
{
"name": "Web H.265",
"description": "HEVC + AAC - Smaller files, modern browsers",
"container": "mp4",
"video_codec": "libx265",
"video_crf": 28,
"video_preset": "medium",
"audio_codec": "aac",
"audio_bitrate": "128k",
},
{
"name": "DNxHR HQ",
"description": "DNxHR High Quality - Professional editing",
"container": "mov",
"video_codec": "dnxhd",
"audio_codec": "pcm_s16le",
"audio_samplerate": 48000,
"extra_args": ["-profile:v", "dnxhr_hq", "-pix_fmt", "yuv422p"],
},
{
"name": "H.264 NVENC",
"description": "NVIDIA GPU encoding - Fast H.264",
"container": "mp4",
"video_codec": "h264_nvenc",
"video_bitrate": "10M",
"audio_codec": "aac",
"audio_bitrate": "192k",
"extra_args": ["-preset", "p4", "-rc", "vbr", "-cq", "19"],
},
{
"name": "HEVC NVENC",
"description": "NVIDIA GPU encoding - HEVC/H.265",
"container": "mp4",
"video_codec": "hevc_nvenc",
"video_bitrate": "8M",
"audio_codec": "aac",
"audio_bitrate": "192k",
"extra_args": ["-preset", "p4", "-rc", "vbr", "-cq", "23"],
},
{
"name": "Archive ProRes",
"description": "Apple ProRes 422 HQ - Archival quality",
"container": "mov",
"video_codec": "prores_ks",
"audio_codec": "pcm_s16le",
"audio_samplerate": 48000,
"extra_args": ["-profile:v", "3"], # ProRes 422 HQ
},
]

10
core/storage/__init__.py Normal file
View File

@@ -0,0 +1,10 @@
from .s3 import (
BUCKET_IN,
BUCKET_OUT,
download_file,
download_to_temp,
get_presigned_url,
get_s3_client,
list_objects,
upload_file,
)

1
core/storage/gcp.py Normal file
View File

@@ -0,0 +1 @@
"""GCP Cloud Storage backend (placeholder)."""

1
core/storage/local.py Normal file
View File

@@ -0,0 +1 @@
"""Local filesystem storage backend (placeholder)."""

15
core/task/__init__.py Normal file
View File

@@ -0,0 +1,15 @@
"""
MPR Worker Module
Provides executor abstraction and Celery tasks for job processing.
"""
from .executor import Executor, LocalExecutor, get_executor
from .tasks import run_transcode_job
__all__ = [
"Executor",
"LocalExecutor",
"get_executor",
"run_transcode_job",
]

260
core/task/executor.py Normal file
View File

@@ -0,0 +1,260 @@
"""
Executor abstraction for job processing.
Supports different backends:
- LocalExecutor: FFmpeg via Celery (default)
- LambdaExecutor: AWS Lambda (future)
"""
import os
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Optional
from core.ffmpeg.transcode import TranscodeConfig, transcode
# Configuration from environment
MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
class Executor(ABC):
"""Abstract base class for job executors."""
@abstractmethod
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""
Execute a transcode/trim job.
Args:
job_id: Unique job identifier
source_path: Path to source file
output_path: Path for output file
preset: Transcode preset dict (optional, None = trim only)
trim_start: Trim start time in seconds (optional)
trim_end: Trim end time in seconds (optional)
duration: Source duration in seconds (for progress calculation)
progress_callback: Called with (percent, details_dict)
Returns:
True if successful
"""
pass
class LocalExecutor(Executor):
"""Execute jobs locally using FFmpeg."""
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Execute job using local FFmpeg."""
# Build config from preset or use stream copy for trim-only
if preset:
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
# Trim-only: stream copy
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
# Wrapper to convert float percent to int
def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
if progress_callback:
progress_callback(int(percent), details)
return transcode(
config,
duration=duration,
progress_callback=wrapped_callback if progress_callback else None,
)
class LambdaExecutor(Executor):
"""Execute jobs via AWS Step Functions + Lambda."""
def __init__(self):
import boto3
region = os.environ.get("AWS_REGION", "us-east-1")
self.sfn = boto3.client("stepfunctions", region_name=region)
self.state_machine_arn = os.environ["STEP_FUNCTION_ARN"]
self.callback_url = os.environ.get("CALLBACK_URL", "")
self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Start a Step Functions execution for this job."""
import json
payload = {
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
response = self.sfn.start_execution(
stateMachineArn=self.state_machine_arn,
name=f"mpr-{job_id}",
input=json.dumps(payload),
)
# Store execution ARN on the job
execution_arn = response["executionArn"]
try:
from core.db import update_job_fields
update_job_fields(job_id, execution_arn=execution_arn)
except Exception:
pass
return True
class GCPExecutor(Executor):
"""Execute jobs via Google Cloud Run Jobs."""
def __init__(self):
from google.cloud import run_v2
self.client = run_v2.JobsClient()
self.project_id = os.environ["GCP_PROJECT_ID"]
self.region = os.environ.get("GCP_REGION", "us-central1")
self.job_name = os.environ["CLOUD_RUN_JOB"]
self.callback_url = os.environ.get("CALLBACK_URL", "")
self.callback_api_key = os.environ.get("CALLBACK_API_KEY", "")
def run(
self,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Trigger a Cloud Run Job execution for this job."""
import json
from google.cloud import run_v2
payload = {
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
job_path = (
f"projects/{self.project_id}/locations/{self.region}/jobs/{self.job_name}"
)
request = run_v2.RunJobRequest(
name=job_path,
overrides=run_v2.RunJobRequest.Overrides(
container_overrides=[
run_v2.RunJobRequest.Overrides.ContainerOverride(
env=[
run_v2.EnvVar(
name="MPR_JOB_PAYLOAD", value=json.dumps(payload)
)
]
)
]
),
)
operation = self.client.run_job(request=request)
execution_name = operation.metadata.name
try:
from core.db import update_job_fields
update_job_fields(job_id, execution_arn=execution_name)
except Exception:
pass
return True
# Executor registry
_executors: Dict[str, type] = {
"local": LocalExecutor,
"lambda": LambdaExecutor,
"gcp": GCPExecutor,
}
_executor_instance: Optional[Executor] = None
def get_executor() -> Executor:
"""Get the configured executor instance."""
global _executor_instance
if _executor_instance is None:
executor_type = MPR_EXECUTOR.lower()
if executor_type not in _executors:
raise ValueError(f"Unknown executor type: {executor_type}")
_executor_instance = _executors[executor_type]()
return _executor_instance

121
core/task/gcp_handler.py Normal file
View File

@@ -0,0 +1,121 @@
"""
Google Cloud Run Job handler for media transcoding.
Reads job payload from the MPR_JOB_PAYLOAD env var (injected by GCPExecutor),
downloads source from S3-compatible storage (GCS via HMAC + S3 API),
runs FFmpeg, uploads result, and calls back to the API.
Uses core/storage and core/ffmpeg — same modules as the Celery worker.
No cloud-provider SDK required here; storage goes through core.storage (boto3 + S3 compat).
Entry point: python -m task.gcp_handler (set as Cloud Run Job command)
"""
import json
import logging
import os
import sys
import tempfile
from pathlib import Path
import requests
from core.ffmpeg.transcode import TranscodeConfig, transcode
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def main() -> None:
raw = os.environ.get("MPR_JOB_PAYLOAD")
if not raw:
logger.error("MPR_JOB_PAYLOAD not set")
sys.exit(1)
event = json.loads(raw)
job_id = event["job_id"]
source_key = event["source_key"]
output_key = event["output_key"]
preset = event.get("preset")
trim_start = event.get("trim_start")
trim_end = event.get("trim_end")
duration = event.get("duration")
callback_url = event.get("callback_url", "")
api_key = event.get("api_key", "")
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
tmp_source = download_to_temp(BUCKET_IN, source_key)
ext_out = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext_out)
os.close(fd)
try:
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
success = transcode(config, duration=duration)
if not success:
raise RuntimeError("Transcode returned False")
logger.info(f"Uploading to {BUCKET_OUT}/{output_key}")
upload_file(tmp_output, BUCKET_OUT, output_key)
_callback(callback_url, job_id, api_key, {"status": "completed"})
logger.info(f"Job {job_id} completed")
sys.exit(0)
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
_callback(callback_url, job_id, api_key, {"status": "failed", "error": str(e)})
sys.exit(1)
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass
def _callback(callback_url: str, job_id: str, api_key: str, payload: dict) -> None:
if not callback_url:
return
try:
url = f"{callback_url}/jobs/{job_id}/callback"
headers = {"X-API-Key": api_key} if api_key else {}
resp = requests.post(url, json=payload, headers=headers, timeout=10)
logger.info(f"Callback response: {resp.status_code}")
except Exception as e:
logger.warning(f"Callback failed: {e}")
if __name__ == "__main__":
main()

148
core/task/lambda_handler.py Normal file
View File

@@ -0,0 +1,148 @@
"""
AWS Lambda handler for media transcoding.
Receives a job payload from Step Functions, downloads source from S3,
runs FFmpeg, uploads result to S3, and calls back to the API.
Uses the same core/ffmpeg module as the local Celery worker.
"""
import json
import logging
import os
import tempfile
from pathlib import Path
import boto3
import requests
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# S3 config
S3_BUCKET_IN = os.environ.get("S3_BUCKET_IN", "mpr-media-in")
S3_BUCKET_OUT = os.environ.get("S3_BUCKET_OUT", "mpr-media-out")
AWS_REGION = os.environ.get("AWS_REGION", "us-east-1")
s3 = boto3.client("s3", region_name=AWS_REGION)
def handler(event, context):
"""
Lambda entry point.
Event payload (from Step Functions):
{
"job_id": "uuid",
"source_key": "path/to/source.mp4",
"output_key": "output_filename.mp4",
"preset": {...} or null,
"trim_start": float or null,
"trim_end": float or null,
"duration": float or null,
"callback_url": "https://mpr.mcrn.ar/api",
"api_key": "secret"
}
"""
job_id = event["job_id"]
source_key = event["source_key"]
output_key = event["output_key"]
preset = event.get("preset")
trim_start = event.get("trim_start")
trim_end = event.get("trim_end")
duration = event.get("duration")
callback_url = event.get("callback_url", "")
api_key = event.get("api_key", "")
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
# Download source from S3
ext_in = Path(source_key).suffix or ".mp4"
tmp_source = tempfile.mktemp(suffix=ext_in, dir="/tmp")
logger.info(f"Downloading s3://{S3_BUCKET_IN}/{source_key}")
s3.download_file(S3_BUCKET_IN, source_key, tmp_source)
# Prepare output temp file
ext_out = Path(output_key).suffix or ".mp4"
tmp_output = tempfile.mktemp(suffix=ext_out, dir="/tmp")
try:
# Import ffmpeg module (bundled in container)
from core.ffmpeg.transcode import TranscodeConfig, transcode
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
success = transcode(config, duration=duration)
if not success:
raise RuntimeError("Transcode returned False")
# Upload result to S3
logger.info(f"Uploading s3://{S3_BUCKET_OUT}/{output_key}")
s3.upload_file(tmp_output, S3_BUCKET_OUT, output_key)
result = {"status": "completed", "job_id": job_id, "output_key": output_key}
# Callback to API
_callback(callback_url, job_id, api_key, {"status": "completed"})
return result
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
_callback(callback_url, job_id, api_key, {
"status": "failed",
"error": str(e),
})
return {"status": "failed", "job_id": job_id, "error": str(e)}
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass
def _callback(callback_url, job_id, api_key, payload):
"""Call back to API with job result."""
if not callback_url:
return
try:
url = f"{callback_url}/jobs/{job_id}/callback"
headers = {}
if api_key:
headers["X-API-Key"] = api_key
resp = requests.post(url, json=payload, headers=headers, timeout=10)
logger.info(f"Callback response: {resp.status_code}")
except Exception as e:
logger.warning(f"Callback failed: {e}")

105
core/task/tasks.py Normal file
View File

@@ -0,0 +1,105 @@
"""
Celery tasks for job processing.
"""
import logging
import os
from typing import Any, Dict, Optional
from celery import shared_task
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from core.rpc.server import update_job_progress
from core.task.executor import get_executor
logger = logging.getLogger(__name__)
@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
def run_transcode_job(
self,
job_id: str,
source_key: str,
output_key: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
) -> Dict[str, Any]:
"""
Celery task to run a transcode/trim job.
Downloads source from S3, runs FFmpeg, uploads result to S3.
"""
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
update_job_progress(job_id, progress=0, status="processing")
# Download source from S3 to temp file
logger.info(f"Downloading {source_key} from {BUCKET_IN}")
tmp_source = download_to_temp(BUCKET_IN, source_key)
# Create temp output path with same extension
import tempfile
from pathlib import Path
ext = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext)
os.close(fd)
def progress_callback(percent: int, details: Dict[str, Any]) -> None:
update_job_progress(
job_id,
progress=percent,
current_time=details.get("time", 0.0),
status="processing",
)
try:
executor = get_executor()
success = executor.run(
job_id=job_id,
source_path=tmp_source,
output_path=tmp_output,
preset=preset,
trim_start=trim_start,
trim_end=trim_end,
duration=duration,
progress_callback=progress_callback,
)
if success:
# Upload result to S3
logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
upload_file(tmp_output, BUCKET_OUT, output_key)
logger.info(f"Job {job_id} completed successfully")
update_job_progress(job_id, progress=100, status="completed")
return {
"status": "completed",
"job_id": job_id,
"output_key": output_key,
}
else:
raise RuntimeError("Executor returned False")
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
update_job_progress(job_id, progress=0, status="failed", error=str(e))
if self.request.retries < self.max_retries:
raise self.retry(exc=e)
return {
"status": "failed",
"job_id": job_id,
"error": str(e),
}
finally:
# Clean up temp files
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass