diff --git a/core/api/chunker_sse.py b/core/api/chunker_sse.py index 5090ab1..2958e11 100644 --- a/core/api/chunker_sse.py +++ b/core/api/chunker_sse.py @@ -1,8 +1,10 @@ """ SSE endpoint for chunker pipeline events. -Bridges gRPC StreamProgress to browser-native EventSource. -GET /api/chunker/stream/{job_id} → text/event-stream +Uses Redis as the event bus between Celery workers and the SSE stream. +Celery worker pushes events via core.events, SSE endpoint polls them. + +GET /chunker/stream/{job_id} → text/event-stream """ import asyncio @@ -14,46 +16,39 @@ from typing import AsyncGenerator from fastapi import APIRouter from starlette.responses import StreamingResponse +from core.events import poll_events + logger = logging.getLogger(__name__) -router = APIRouter(prefix="/api/chunker", tags=["chunker"]) +router = APIRouter(prefix="/chunker", tags=["chunker"]) async def _event_generator(job_id: str) -> AsyncGenerator[str, None]: """ - Generate SSE events by polling gRPC job state. - - Yields server-sent events in the format: - event: - data: + Generate SSE events by polling Redis for chunk job events. """ - from core.rpc.server import _active_jobs - - last_state = None + cursor = 0 timeout = time.monotonic() + 600 # 10 min max while time.monotonic() < timeout: - job_state = _active_jobs.get(job_id) + events, cursor = poll_events(job_id, cursor) - if job_state is None: - # Job not found yet — may not have started + if not events: yield f"event: waiting\ndata: {json.dumps({'job_id': job_id})}\n\n" - await asyncio.sleep(0.5) + await asyncio.sleep(0.1) continue - # Only send if state changed - if job_state != last_state: - last_state = dict(job_state) - event_type = job_state.get("status", "update") + for data in events: + event_type = data.pop("event", "update") + payload = {**data, "job_id": job_id} - yield f"event: {event_type}\ndata: {json.dumps({**job_state, 'job_id': job_id})}\n\n" + yield f"event: {event_type}\ndata: {json.dumps(payload)}\n\n" - # End stream when job is terminal - if event_type in ("completed", "failed", "cancelled"): + if event_type in ("pipeline_complete", "pipeline_error", "cancelled"): yield f"event: done\ndata: {json.dumps({'job_id': job_id})}\n\n" - break + return - await asyncio.sleep(0.2) + await asyncio.sleep(0.05) yield f"event: timeout\ndata: {json.dumps({'job_id': job_id})}\n\n" diff --git a/core/api/graphql.py b/core/api/graphql.py index 15ea60d..6cd3b41 100644 --- a/core/api/graphql.py +++ b/core/api/graphql.py @@ -15,7 +15,9 @@ from strawberry.schema.config import StrawberryConfig from strawberry.types import Info from core.api.schema.graphql import ( + CancelResultType, ChunkJobType, + ChunkOutputFileType, CreateChunkJobInput, CreateJobInput, DeleteResultType, @@ -26,7 +28,7 @@ from core.api.schema.graphql import ( TranscodePresetType, UpdateAssetInput, ) -from core.storage import BUCKET_IN, list_objects +from core.storage import BUCKET_IN, list_objects, upload_file VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"} AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"} @@ -90,6 +92,25 @@ class Query: def system_status(self, info: Info) -> SystemStatusType: return SystemStatusType(status="ok", version="0.1.0") + @strawberry.field + def chunk_output_files(self, info: Info, job_id: str) -> List[ChunkOutputFileType]: + """List output chunk files for a completed job from media/out/.""" + from pathlib import Path + + media_out = os.environ.get("MEDIA_OUT_DIR", "/app/media/out") + output_dir = Path(media_out) / "chunks" / job_id + if not output_dir.is_dir(): + return [] + return [ + ChunkOutputFileType( + key=f.name, + size=f.stat().st_size, + url=f"/media/out/chunks/{job_id}/{f.name}", + ) + for f in sorted(output_dir.iterdir()) + if f.is_file() + ] + # --------------------------------------------------------------------------- # Mutations @@ -100,8 +121,26 @@ class Query: class Mutation: @strawberry.mutation def scan_media_folder(self, info: Info) -> ScanResultType: + import logging + from pathlib import Path + from core.db import create_asset, get_asset_filenames + logger = logging.getLogger(__name__) + + # Sync local media/in/ files to MinIO (handles fresh installs / pruned volumes) + local_media = Path("/app/media/in") + if local_media.is_dir(): + existing_keys = {o["key"] for o in list_objects(BUCKET_IN)} + for f in local_media.iterdir(): + if f.is_file() and f.suffix.lower() in MEDIA_EXTS: + if f.name not in existing_keys: + try: + upload_file(str(f), BUCKET_IN, f.name) + logger.info("Uploaded %s to MinIO", f.name) + except Exception as e: + logger.warning("Failed to upload %s: %s", f.name, e) + objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS) existing = get_asset_filenames() @@ -284,6 +323,8 @@ class Mutation: "num_workers": input.num_workers, "max_retries": input.max_retries, "processor_type": input.processor_type, + "start_time": input.start_time, + "end_time": input.end_time, } executor_mode = os.environ.get("MPR_EXECUTOR", "local") @@ -320,6 +361,17 @@ class Mutation: celery_task_id=celery_task_id, ) + @strawberry.mutation + def cancel_chunk_job(self, info: Info, celery_task_id: str) -> CancelResultType: + """Cancel a running chunk job by revoking its Celery task.""" + try: + from admin.mpr.celery import app as celery_app + + celery_app.control.revoke(celery_task_id, terminate=True, signal="SIGTERM") + return CancelResultType(ok=True, message="Task revoked") + except Exception as e: + return CancelResultType(ok=False, message=str(e)) + # --------------------------------------------------------------------------- # Schema diff --git a/core/api/schema/graphql.py b/core/api/schema/graphql.py index 51f1bda..cf4c9a3 100644 --- a/core/api/schema/graphql.py +++ b/core/api/schema/graphql.py @@ -37,7 +37,7 @@ class MediaAssetType: file_path: Optional[str] = None status: Optional[str] = None error_message: Optional[str] = None - file_size: Optional[int] = None + file_size: Optional[float] = None duration: Optional[float] = None video_codec: Optional[str] = None audio_codec: Optional[str] = None @@ -205,3 +205,22 @@ class CreateChunkJobInput: max_retries: int = 3 processor_type: str = "ffmpeg" priority: int = 0 + start_time: Optional[float] = None + end_time: Optional[float] = None + + +@strawberry.type +class CancelResultType: + """Result of cancelling a chunk job.""" + + ok: bool = False + message: Optional[str] = None + + +@strawberry.type +class ChunkOutputFileType: + """A chunk output file in S3/MinIO with presigned download URL.""" + + key: str + size: int = 0 + url: str = "" diff --git a/core/chunker/chunker.py b/core/chunker/chunker.py index 8301c17..53a2eb0 100644 --- a/core/chunker/chunker.py +++ b/core/chunker/chunker.py @@ -28,7 +28,13 @@ class Chunker: chunk_duration: Duration of each chunk in seconds (default: 10.0) """ - def __init__(self, file_path: str, chunk_duration: float = 10.0): + def __init__( + self, + file_path: str, + chunk_duration: float = 10.0, + start_time: float | None = None, + end_time: float | None = None, + ): if not os.path.isfile(file_path): raise ChunkReadError(f"File not found: {file_path}") if chunk_duration <= 0: @@ -37,7 +43,16 @@ class Chunker: self.file_path = file_path self.chunk_duration = chunk_duration self.file_size = os.path.getsize(file_path) - self.source_duration = self._probe_duration() + full_duration = self._probe_duration() + + # Apply time range + self.range_start = max(start_time or 0.0, 0.0) + self.range_end = min(end_time or full_duration, full_duration) + if self.range_start >= self.range_end: + raise ValueError( + f"Invalid range: start={self.range_start} >= end={self.range_end}" + ) + self.source_duration = self.range_end - self.range_start def _probe_duration(self) -> float: """Get source file duration via FFmpeg probe.""" @@ -71,9 +86,9 @@ class Chunker: """ total = self.expected_chunks for sequence in range(total): - start_time = sequence * self.chunk_duration + start_time = self.range_start + sequence * self.chunk_duration end_time = min( - start_time + self.chunk_duration, self.source_duration + start_time + self.chunk_duration, self.range_end ) duration = end_time - start_time diff --git a/core/chunker/pipeline.py b/core/chunker/pipeline.py index 9bac5b8..975c249 100644 --- a/core/chunker/pipeline.py +++ b/core/chunker/pipeline.py @@ -57,6 +57,8 @@ class Pipeline: queue_size: int = 10, event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None, output_dir: Optional[str] = None, + start_time: Optional[float] = None, + end_time: Optional[float] = None, ): self.source = source self.chunk_duration = chunk_duration @@ -66,6 +68,8 @@ class Pipeline: self.queue_size = queue_size self.event_callback = event_callback self.output_dir = output_dir + self.start_time = start_time + self.end_time = end_time def _emit(self, event_type: str, data: Dict[str, Any]) -> None: """Emit an event if callback is registered.""" @@ -92,6 +96,19 @@ class Pipeline: finally: chunk_queue.close() + def _monitor_progress( + self, start_time: float, file_size: int, stop_event: threading.Event + ) -> None: + """Monitor thread: emit pipeline_progress every 500ms.""" + while not stop_event.is_set(): + elapsed = time.monotonic() - start_time + mb = file_size / (1024 * 1024) + self._emit("pipeline_progress", { + "elapsed": round(elapsed, 2), + "throughput_mbps": round(mb / elapsed, 2) if elapsed > 0 else 0, + }) + stop_event.wait(0.5) + def _write_manifest( self, result: PipelineResult, source_duration: float ) -> None: @@ -146,7 +163,12 @@ class Pipeline: try: # Stage 1: Set up chunker (probes file for duration) - chunker = Chunker(self.source, self.chunk_duration) + chunker = Chunker( + self.source, + self.chunk_duration, + start_time=self.start_time, + end_time=self.end_time, + ) total_chunks = chunker.expected_chunks if total_chunks == 0: @@ -170,9 +192,18 @@ class Pipeline: output_dir=self.output_dir, ) - # Stage 3: Start workers, then produce chunks + # Stage 3: Start workers, monitor, then produce chunks pool.start() + monitor_stop = threading.Event() + monitor = threading.Thread( + target=self._monitor_progress, + args=(start_time, chunker.file_size, monitor_stop), + name="progress-monitor", + daemon=True, + ) + monitor.start() + producer = threading.Thread( target=self._produce_chunks, args=(chunker, chunk_queue), @@ -185,6 +216,10 @@ class Pipeline: all_results = pool.wait() producer.join(timeout=5.0) + # Stop monitor + monitor_stop.set() + monitor.join(timeout=2.0) + # Stage 5: Collect results in order collector = ResultCollector(total_chunks) for r in all_results: diff --git a/core/chunker/worker.py b/core/chunker/worker.py index 2fcc550..de094ca 100644 --- a/core/chunker/worker.py +++ b/core/chunker/worker.py @@ -124,6 +124,7 @@ class Worker: self._emit("chunk_processing", { "sequence": chunk.sequence, "state": "processing", + "queue_size": self.chunk_queue.qsize(), }) result = self._process_with_retry(chunk) @@ -135,6 +136,7 @@ class Worker: "success": result.success, "processing_time": result.processing_time, "retries": result.retries, + "queue_size": self.chunk_queue.qsize(), }) self._emit("worker_status", {"state": "stopped"}) diff --git a/core/events.py b/core/events.py new file mode 100644 index 0000000..5ea6980 --- /dev/null +++ b/core/events.py @@ -0,0 +1,40 @@ +""" +Redis-based event bus for pipeline job progress. + +Celery workers push events, SSE endpoints poll them. +Only depends on redis — safe to import from any context. +""" + +import json +import os + +import redis + +REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + +def _get_redis(): + return redis.from_url(REDIS_URL, decode_responses=True) + + +def push_event(job_id: str, event_type: str, data: dict) -> None: + """Push an event to the Redis list for a job.""" + r = _get_redis() + key = f"chunk_events:{job_id}" + event = json.dumps({"event": event_type, **data}) + r.rpush(key, event) + r.expire(key, 3600) + + +def poll_events(job_id: str, cursor: int = 0) -> tuple[list[dict], int]: + """Poll new events from Redis. Returns (events, new_cursor).""" + r = _get_redis() + key = f"chunk_events:{job_id}" + raw_events = r.lrange(key, cursor, -1) + parsed = [] + for raw in raw_events: + try: + parsed.append(json.loads(raw)) + except (json.JSONDecodeError, TypeError): + pass + return parsed, cursor + len(raw_events) diff --git a/core/jobs/handlers/chunk.py b/core/jobs/handlers/chunk.py index 7a06a11..352a5fe 100644 --- a/core/jobs/handlers/chunk.py +++ b/core/jobs/handlers/chunk.py @@ -2,22 +2,24 @@ ChunkHandler — job handler that wraps the chunker Pipeline. Downloads source from S3/MinIO, runs FFmpeg chunking pipeline, -uploads mp4 segments + manifest back to S3/MinIO. +writes mp4 segments + manifest to media/out/chunks/{job_id}/. +Pushes real-time events to Redis for SSE consumption. """ import logging import os -import shutil -import tempfile from typing import Any, Callable, Dict, Optional +from core.events import push_event as push_chunk_event from core.chunker import Pipeline -from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file +from core.storage import BUCKET_IN, download_to_temp from .base import Handler logger = logging.getLogger(__name__) +MEDIA_OUT_DIR = os.environ.get("MEDIA_OUT_DIR", "/app/media/out") + class ChunkHandler(Handler): """ @@ -44,14 +46,19 @@ class ChunkHandler(Handler): logger.info(f"ChunkHandler starting job {job_id}: {source_key}") # Download source from S3/MinIO + push_chunk_event(job_id, "pipeline_start", {"status": "downloading", "source_key": source_key}) tmp_source = download_to_temp(BUCKET_IN, source_key) - # Create temp output directory for chunks - tmp_output_dir = tempfile.mkdtemp(prefix=f"chunks-{job_id}-") + # Output directory: media/out/chunks/{job_id}/ + output_dir = os.path.join(MEDIA_OUT_DIR, "chunks", job_id) + if processor_type == "ffmpeg": + os.makedirs(output_dir, exist_ok=True) try: def event_bridge(event_type: str, data: Dict[str, Any]) -> None: - """Bridge pipeline events to the job progress callback.""" + """Bridge pipeline events to Redis + optional progress callback.""" + push_chunk_event(job_id, event_type, data) + if progress_callback and event_type == "pipeline_complete": progress_callback(100, data) elif progress_callback and event_type == "chunk_done": @@ -68,29 +75,28 @@ class ChunkHandler(Handler): processor_type=processor_type, queue_size=payload.get("queue_size", 10), event_callback=event_bridge, - output_dir=tmp_output_dir if processor_type == "ffmpeg" else None, + output_dir=output_dir if processor_type == "ffmpeg" else None, + start_time=payload.get("start_time"), + end_time=payload.get("end_time"), ) result = pipeline.run() - # Upload chunks + manifest to S3/MinIO + # Files are already in media/out/chunks/{job_id}/ output_prefix = f"chunks/{job_id}" - uploaded_files = [] + output_files = [ + f"{output_prefix}/{os.path.basename(f)}" + for f in result.chunk_files + ] - for chunk_file in result.chunk_files: - filename = os.path.basename(chunk_file) - output_key = f"{output_prefix}/{filename}" - upload_file(chunk_file, BUCKET_OUT, output_key) - uploaded_files.append(output_key) - logger.info(f"Uploaded {output_key}") - - # Upload manifest - manifest_path = os.path.join(tmp_output_dir, "manifest.json") - if os.path.exists(manifest_path): - manifest_key = f"{output_prefix}/manifest.json" - upload_file(manifest_path, BUCKET_OUT, manifest_key) - uploaded_files.append(manifest_key) - logger.info(f"Uploaded {manifest_key}") + push_chunk_event(job_id, "pipeline_complete", { + "status": "completed", + "total_chunks": result.total_chunks, + "processed": result.processed, + "failed": result.failed, + "elapsed": result.elapsed_time, + "throughput_mbps": result.throughput_mbps, + }) return { "status": "completed" if result.failed == 0 else "completed_with_errors", @@ -104,16 +110,16 @@ class ChunkHandler(Handler): "errors": result.errors, "chunks_in_order": result.chunks_in_order, "output_prefix": output_prefix, - "uploaded_files": uploaded_files, + "output_files": output_files, } + except Exception as e: + push_chunk_event(job_id, "pipeline_error", {"status": "failed", "error": str(e)}) + raise + finally: - # Cleanup temp files + # Cleanup temp source file only (output dir is persistent) try: os.unlink(tmp_source) except OSError: pass - try: - shutil.rmtree(tmp_output_dir, ignore_errors=True) - except OSError: - pass diff --git a/core/rpc/protos/worker.proto b/core/rpc/protos/worker.proto index 9114563..c78714a 100644 --- a/core/rpc/protos/worker.proto +++ b/core/rpc/protos/worker.proto @@ -11,6 +11,7 @@ service WorkerService { rpc StreamProgress(ProgressRequest) returns (stream ProgressUpdate); rpc CancelJob(CancelRequest) returns (CancelResponse); rpc GetWorkerStatus(Empty) returns (WorkerStatus); + rpc StreamChunkPipeline(ChunkStreamRequest) returns (stream ChunkPipelineEvent); } message JobRequest { @@ -62,3 +63,24 @@ message WorkerStatus { message Empty { // Empty } + +message ChunkStreamRequest { + string job_id = 1; +} + +message ChunkPipelineEvent { + string job_id = 1; + string event_type = 2; + int32 sequence = 3; + string worker_id = 4; + string state = 5; + int32 queue_size = 6; + float elapsed = 7; + float throughput_mbps = 8; + int32 total_chunks = 9; + int32 processed_chunks = 10; + int32 failed_chunks = 11; + string error = 12; + float processing_time = 13; + int32 retries = 14; +} diff --git a/core/rpc/server.py b/core/rpc/server.py index d07ce61..5d412da 100644 --- a/core/rpc/server.py +++ b/core/rpc/server.py @@ -173,6 +173,43 @@ class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer): message="Job not found", ) + def StreamChunkPipeline(self, request, context) -> Iterator[worker_pb2.ChunkPipelineEvent]: + """Stream chunk pipeline events for a job.""" + from core.events import poll_events + + job_id = request.job_id + logger.info(f"StreamChunkPipeline: {job_id}") + + cursor = 0 + timeout = time.monotonic() + 600 # 10 min max + + while context.is_active() and time.monotonic() < timeout: + events, cursor = poll_events(job_id, cursor) + + for data in events: + event_type = data.pop("event", "") + yield worker_pb2.ChunkPipelineEvent( + job_id=job_id, + event_type=event_type, + sequence=data.get("sequence", 0), + worker_id=data.get("worker_id", ""), + state=data.get("state", ""), + queue_size=data.get("queue_size", 0), + elapsed=data.get("elapsed", 0.0), + throughput_mbps=data.get("throughput_mbps", 0.0), + total_chunks=data.get("total_chunks", 0), + processed_chunks=data.get("processed_chunks", 0), + failed_chunks=data.get("failed_chunks", 0), + error=data.get("error", ""), + processing_time=data.get("processing_time", 0.0), + retries=data.get("retries", 0), + ) + + if event_type in ("pipeline_complete", "pipeline_error"): + return + + time.sleep(0.05) + def GetWorkerStatus(self, request, context): """Get worker health and capabilities.""" try: diff --git a/core/rpc/worker_pb2.py b/core/rpc/worker_pb2.py index 80e125c..0ddf905 100644 --- a/core/rpc/worker_pb2.py +++ b/core/rpc/worker_pb2.py @@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cworker.proto\x12\nmpr.worker\"\xa7\x01\n\nJobRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x13\n\x0bsource_path\x18\x02 \x01(\t\x12\x13\n\x0boutput_path\x18\x03 \x01(\t\x12\x13\n\x0bpreset_json\x18\x04 \x01(\t\x12\x17\n\ntrim_start\x18\x05 \x01(\x02H\x00\x88\x01\x01\x12\x15\n\x08trim_end\x18\x06 \x01(\x02H\x01\x88\x01\x01\x42\r\n\x0b_trim_startB\x0b\n\t_trim_end\"@\n\x0bJobResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08\x61\x63\x63\x65pted\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"!\n\x0fProgressRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x9c\x01\n\x0eProgressUpdate\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08progress\x18\x02 \x01(\x05\x12\x15\n\rcurrent_frame\x18\x03 \x01(\x05\x12\x14\n\x0c\x63urrent_time\x18\x04 \x01(\x02\x12\r\n\x05speed\x18\x05 \x01(\x02\x12\x0e\n\x06status\x18\x06 \x01(\t\x12\x12\n\x05\x65rror\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_error\"\x1f\n\rCancelRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"D\n\x0e\x43\x61ncelResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x11\n\tcancelled\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"g\n\x0cWorkerStatus\x12\x11\n\tavailable\x18\x01 \x01(\x08\x12\x13\n\x0b\x61\x63tive_jobs\x18\x02 \x01(\x05\x12\x18\n\x10supported_codecs\x18\x03 \x03(\t\x12\x15\n\rgpu_available\x18\x04 \x01(\x08\"\x07\n\x05\x45mpty2\x9e\x02\n\rWorkerService\x12<\n\tSubmitJob\x12\x16.mpr.worker.JobRequest\x1a\x17.mpr.worker.JobResponse\x12K\n\x0eStreamProgress\x12\x1b.mpr.worker.ProgressRequest\x1a\x1a.mpr.worker.ProgressUpdate0\x01\x12\x42\n\tCancelJob\x12\x19.mpr.worker.CancelRequest\x1a\x1a.mpr.worker.CancelResponse\x12>\n\x0fGetWorkerStatus\x12\x11.mpr.worker.Empty\x1a\x18.mpr.worker.WorkerStatusb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cworker.proto\x12\nmpr.worker\"\xa7\x01\n\nJobRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x13\n\x0bsource_path\x18\x02 \x01(\t\x12\x13\n\x0boutput_path\x18\x03 \x01(\t\x12\x13\n\x0bpreset_json\x18\x04 \x01(\t\x12\x17\n\ntrim_start\x18\x05 \x01(\x02H\x00\x88\x01\x01\x12\x15\n\x08trim_end\x18\x06 \x01(\x02H\x01\x88\x01\x01\x42\r\n\x0b_trim_startB\x0b\n\t_trim_end\"@\n\x0bJobResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08\x61\x63\x63\x65pted\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"!\n\x0fProgressRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x9c\x01\n\x0eProgressUpdate\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08progress\x18\x02 \x01(\x05\x12\x15\n\rcurrent_frame\x18\x03 \x01(\x05\x12\x14\n\x0c\x63urrent_time\x18\x04 \x01(\x02\x12\r\n\x05speed\x18\x05 \x01(\x02\x12\x0e\n\x06status\x18\x06 \x01(\t\x12\x12\n\x05\x65rror\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_error\"\x1f\n\rCancelRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"D\n\x0e\x43\x61ncelResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x11\n\tcancelled\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"g\n\x0cWorkerStatus\x12\x11\n\tavailable\x18\x01 \x01(\x08\x12\x13\n\x0b\x61\x63tive_jobs\x18\x02 \x01(\x05\x12\x18\n\x10supported_codecs\x18\x03 \x03(\t\x12\x15\n\rgpu_available\x18\x04 \x01(\x08\"\x07\n\x05\x45mpty\"$\n\x12\x43hunkStreamRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\xaa\x02\n\x12\x43hunkPipelineEvent\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x12\n\nevent_type\x18\x02 \x01(\t\x12\x10\n\x08sequence\x18\x03 \x01(\x05\x12\x11\n\tworker_id\x18\x04 \x01(\t\x12\r\n\x05state\x18\x05 \x01(\t\x12\x12\n\nqueue_size\x18\x06 \x01(\x05\x12\x0f\n\x07\x65lapsed\x18\x07 \x01(\x02\x12\x17\n\x0fthroughput_mbps\x18\x08 \x01(\x02\x12\x14\n\x0ctotal_chunks\x18\t \x01(\x05\x12\x18\n\x10processed_chunks\x18\n \x01(\x05\x12\x15\n\rfailed_chunks\x18\x0b \x01(\x05\x12\r\n\x05\x65rror\x18\x0c \x01(\t\x12\x17\n\x0fprocessing_time\x18\r \x01(\x02\x12\x0f\n\x07retries\x18\x0e \x01(\x05\x32\xf7\x02\n\rWorkerService\x12<\n\tSubmitJob\x12\x16.mpr.worker.JobRequest\x1a\x17.mpr.worker.JobResponse\x12K\n\x0eStreamProgress\x12\x1b.mpr.worker.ProgressRequest\x1a\x1a.mpr.worker.ProgressUpdate0\x01\x12\x42\n\tCancelJob\x12\x19.mpr.worker.CancelRequest\x1a\x1a.mpr.worker.CancelResponse\x12>\n\x0fGetWorkerStatus\x12\x11.mpr.worker.Empty\x1a\x18.mpr.worker.WorkerStatus\x12W\n\x13StreamChunkPipeline\x12\x1e.mpr.worker.ChunkStreamRequest\x1a\x1e.mpr.worker.ChunkPipelineEvent0\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -47,6 +47,10 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['_WORKERSTATUS']._serialized_end=664 _globals['_EMPTY']._serialized_start=666 _globals['_EMPTY']._serialized_end=673 - _globals['_WORKERSERVICE']._serialized_start=676 - _globals['_WORKERSERVICE']._serialized_end=962 + _globals['_CHUNKSTREAMREQUEST']._serialized_start=675 + _globals['_CHUNKSTREAMREQUEST']._serialized_end=711 + _globals['_CHUNKPIPELINEEVENT']._serialized_start=714 + _globals['_CHUNKPIPELINEEVENT']._serialized_end=1012 + _globals['_WORKERSERVICE']._serialized_start=1015 + _globals['_WORKERSERVICE']._serialized_end=1390 # @@protoc_insertion_point(module_scope) diff --git a/core/rpc/worker_pb2_grpc.py b/core/rpc/worker_pb2_grpc.py index 402cab9..925ca59 100644 --- a/core/rpc/worker_pb2_grpc.py +++ b/core/rpc/worker_pb2_grpc.py @@ -5,7 +5,7 @@ import warnings from . import worker_pb2 as worker__pb2 -GRPC_GENERATED_VERSION = '1.76.0' +GRPC_GENERATED_VERSION = '1.78.0' GRPC_VERSION = grpc.__version__ _version_not_supported = False @@ -54,6 +54,11 @@ class WorkerServiceStub(object): request_serializer=worker__pb2.Empty.SerializeToString, response_deserializer=worker__pb2.WorkerStatus.FromString, _registered_method=True) + self.StreamChunkPipeline = channel.unary_stream( + '/mpr.worker.WorkerService/StreamChunkPipeline', + request_serializer=worker__pb2.ChunkStreamRequest.SerializeToString, + response_deserializer=worker__pb2.ChunkPipelineEvent.FromString, + _registered_method=True) class WorkerServiceServicer(object): @@ -83,6 +88,12 @@ class WorkerServiceServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def StreamChunkPipeline(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_WorkerServiceServicer_to_server(servicer, server): rpc_method_handlers = { @@ -106,6 +117,11 @@ def add_WorkerServiceServicer_to_server(servicer, server): request_deserializer=worker__pb2.Empty.FromString, response_serializer=worker__pb2.WorkerStatus.SerializeToString, ), + 'StreamChunkPipeline': grpc.unary_stream_rpc_method_handler( + servicer.StreamChunkPipeline, + request_deserializer=worker__pb2.ChunkStreamRequest.FromString, + response_serializer=worker__pb2.ChunkPipelineEvent.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'mpr.worker.WorkerService', rpc_method_handlers) @@ -224,3 +240,30 @@ class WorkerService(object): timeout, metadata, _registered_method=True) + + @staticmethod + def StreamChunkPipeline(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_stream( + request, + target, + '/mpr.worker.WorkerService/StreamChunkPipeline', + worker__pb2.ChunkStreamRequest.SerializeToString, + worker__pb2.ChunkPipelineEvent.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) diff --git a/core/schema/modelgen.json b/core/schema/modelgen.json index 80a17a9..dcf1c6d 100644 --- a/core/schema/modelgen.json +++ b/core/schema/modelgen.json @@ -13,8 +13,8 @@ }, { "target": "typescript", - "output": "ui/timeline/src/types.ts", - "include": ["dataclasses", "enums", "api"] + "output": "ui/common/types/generated.ts", + "include": ["dataclasses", "enums", "api", "views"] }, { "target": "protobuf", diff --git a/core/schema/models/__init__.py b/core/schema/models/__init__.py index b0b0dcc..d62b172 100644 --- a/core/schema/models/__init__.py +++ b/core/schema/models/__init__.py @@ -16,6 +16,8 @@ from .grpc import ( GRPC_SERVICE, CancelRequest, CancelResponse, + ChunkPipelineEvent, + ChunkStreamRequest, Empty, JobRequest, JobResponse, @@ -26,6 +28,7 @@ from .grpc import ( from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob from .media import AssetStatus, MediaAsset from .presets import BUILTIN_PRESETS, TranscodePreset +from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent # Core domain models - generates Django, Pydantic, TypeScript DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob] @@ -44,6 +47,9 @@ API_MODELS = [ # Status enums - included in generated code ENUMS = [AssetStatus, JobStatus, ChunkJobStatus] +# View/event models - generates TypeScript for UI consumption +VIEWS = [ChunkEvent, WorkerEvent, PipelineStats, ChunkOutputFile] + # gRPC messages - generates Proto GRPC_MESSAGES = [ JobRequest, @@ -54,6 +60,8 @@ GRPC_MESSAGES = [ CancelResponse, WorkerStatus, Empty, + ChunkStreamRequest, + ChunkPipelineEvent, ] __all__ = [ @@ -82,10 +90,18 @@ __all__ = [ "CancelResponse", "WorkerStatus", "Empty", + "ChunkStreamRequest", + "ChunkPipelineEvent", + # Views + "ChunkEvent", + "WorkerEvent", + "PipelineStats", + "ChunkOutputFile", # For generator "DATACLASSES", "API_MODELS", "ENUMS", + "VIEWS", "GRPC_MESSAGES", "BUILTIN_PRESETS", ] diff --git a/core/schema/models/grpc.py b/core/schema/models/grpc.py index 313bf7b..841c0a3 100644 --- a/core/schema/models/grpc.py +++ b/core/schema/models/grpc.py @@ -41,6 +41,13 @@ class CancelRequest: job_id: str +@dataclass +class ChunkStreamRequest: + """Request to stream chunk pipeline events.""" + + job_id: str + + @dataclass class Empty: """Empty message for requests with no parameters.""" @@ -94,6 +101,26 @@ class WorkerStatus: gpu_available: bool +@dataclass +class ChunkPipelineEvent: + """Streaming chunk pipeline event.""" + + job_id: str + event_type: str # pipeline_start, chunk_queued, chunk_done, etc. + sequence: int = 0 + worker_id: str = "" + state: str = "" + queue_size: int = 0 + elapsed: float = 0.0 + throughput_mbps: float = 0.0 + total_chunks: int = 0 + processed_chunks: int = 0 + failed_chunks: int = 0 + error: str = "" + processing_time: float = 0.0 + retries: int = 0 + + # ----------------------------------------------------------------------------- # Service Definition (for documentation, generator uses this) # ----------------------------------------------------------------------------- @@ -126,5 +153,11 @@ GRPC_SERVICE = { "response": WorkerStatus, "stream_response": False, }, + { + "name": "StreamChunkPipeline", + "request": ChunkStreamRequest, + "response": ChunkPipelineEvent, + "stream_response": True, # Server streaming + }, ], } diff --git a/core/schema/models/views.py b/core/schema/models/views.py new file mode 100644 index 0000000..5d9284e --- /dev/null +++ b/core/schema/models/views.py @@ -0,0 +1,57 @@ +""" +View/Event Schema Definitions + +Projections of domain models for UI consumption via SSE events. +These reference existing schema types (e.g., ChunkJobStatus) to maintain +type-level dependencies — if the domain model changes, views update too. +""" + +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ChunkEvent: + """SSE event for a single chunk's lifecycle.""" + + sequence: int + status: str + size: Optional[int] = None + worker_id: Optional[str] = None + processing_time: Optional[float] = None + error: Optional[str] = None + retries: int = 0 + + +@dataclass +class WorkerEvent: + """SSE event for worker state changes.""" + + worker_id: str + state: str + current_chunk: Optional[int] = None + processed: int = 0 + errors: int = 0 + retries: int = 0 + + +@dataclass +class PipelineStats: + """Aggregate pipeline statistics, updated via SSE.""" + + total_chunks: int = 0 + processed: int = 0 + failed: int = 0 + retries: int = 0 + elapsed: float = 0.0 + throughput_mbps: float = 0.0 + queue_size: int = 0 + + +@dataclass +class ChunkOutputFile: + """A chunk output file in S3/MinIO with presigned download URL.""" + + key: str + size: int = 0 + url: str = "" diff --git a/ctrl/docker-compose.yml b/ctrl/docker-compose.yml index df2678c..47c9ae2 100644 --- a/ctrl/docker-compose.yml +++ b/ctrl/docker-compose.yml @@ -89,6 +89,15 @@ services: mc anonymous set download local/mpr-media-in mc anonymous set download local/mpr-media-out + envoy: + image: envoyproxy/envoy:v1.28-latest + ports: + - "8090:8090" + volumes: + - ./envoy.yaml:/etc/envoy/envoy.yaml:ro + depends_on: + - grpc + nginx: image: nginx:alpine ports: @@ -96,12 +105,14 @@ services: volumes: - ./nginx.conf:/etc/nginx/nginx.conf:ro - ./landing.html:/etc/nginx/landing.html:ro + - ../media/out:/app/media/out:ro depends_on: - django - fastapi - timeline - chunker - minio + - envoy # ============================================================================= # Application Services @@ -139,7 +150,7 @@ services: build: context: .. dockerfile: ctrl/Dockerfile.worker - command: celery -A admin.mpr worker -l info -Q transcode -c 2 + command: celery -A admin.mpr worker -l info -Q celery,transcode -c 2 environment: <<: *common-env MPR_EXECUTOR: local @@ -163,6 +174,8 @@ services: VITE_ALLOWED_HOSTS: ${VITE_ALLOWED_HOSTS:-} volumes: - ../ui/timeline/src:/app/src + - ../ui/timeline/vite.config.ts:/app/vite.config.ts + - ../ui/common:/common chunker: build: @@ -174,6 +187,8 @@ services: VITE_ALLOWED_HOSTS: ${VITE_ALLOWED_HOSTS:-} volumes: - ../ui/chunker/src:/app/src + - ../ui/chunker/vite.config.ts:/app/vite.config.ts + - ../ui/common:/common volumes: postgres-data: diff --git a/ctrl/envoy.yaml b/ctrl/envoy.yaml new file mode 100644 index 0000000..61362e0 --- /dev/null +++ b/ctrl/envoy.yaml @@ -0,0 +1,64 @@ +admin: + address: + socket_address: { address: 0.0.0.0, port_value: 9901 } + +static_resources: + listeners: + - name: listener_0 + address: + socket_address: { address: 0.0.0.0, port_value: 8090 } + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + codec_type: auto + stat_prefix: ingress_http + route_config: + name: local_route + virtual_hosts: + - name: local_service + domains: ["*"] + routes: + - match: { prefix: "/" } + route: + cluster: grpc_service + timeout: 600s + max_stream_duration: + grpc_timeout_header_max: 600s + cors: + allow_origin_string_match: + - prefix: "*" + allow_methods: GET, PUT, DELETE, POST, OPTIONS + allow_headers: keep-alive,user-agent,cache-control,content-type,content-transfer-encoding,x-accept-content-transfer-encoding,x-accept-response-streaming,x-user-agent,x-grpc-web,grpc-timeout + expose_headers: grpc-status,grpc-message + max_age: "1728000" + http_filters: + - name: envoy.filters.http.grpc_web + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.grpc_web.v3.GrpcWeb + - name: envoy.filters.http.cors + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + clusters: + - name: grpc_service + connect_timeout: 5s + type: logical_dns + lb_policy: round_robin + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: {} + load_assignment: + cluster_name: grpc_service + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: grpc + port_value: 50051 diff --git a/ctrl/generate.sh b/ctrl/generate.sh index f3f9337..e3dbe5f 100755 --- a/ctrl/generate.sh +++ b/ctrl/generate.sh @@ -19,4 +19,13 @@ python -m grpc_tools.protoc \ # Fix relative import in generated grpc stub sed -i 's/^import worker_pb2/from . import worker_pb2/' core/rpc/worker_pb2_grpc.py +# Generate TypeScript gRPC-Web client from proto +echo "Generating TypeScript gRPC-Web client..." +cd ui/chunker +npx protoc \ + --ts_out ../common/api/grpc \ + --proto_path ../../core/rpc/protos \ + worker.proto +cd ../.. + echo "Done!" diff --git a/ctrl/nginx.conf b/ctrl/nginx.conf index b20c6ac..1cdb4ce 100644 --- a/ctrl/nginx.conf +++ b/ctrl/nginx.conf @@ -29,6 +29,10 @@ http { server minio:9000; } + upstream envoy { + server envoy:8090; + } + server { listen 80; server_name mpr.local.ar; @@ -106,8 +110,24 @@ http { } location /media/out/ { - proxy_pass http://minio/mpr-media-out/; - proxy_set_header Host $http_host; + alias /app/media/out/; + autoindex on; + } + + # gRPC-Web proxy via Envoy + location /grpc-web/ { + proxy_pass http://envoy/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600s; + + # Critical for streaming: disable nginx response buffering + proxy_buffering off; + proxy_cache off; + chunked_transfer_encoding on; } } } diff --git a/docs/architecture/05-chunker-pipeline.md b/docs/architecture/05-chunker-pipeline.md new file mode 100644 index 0000000..04e9e46 --- /dev/null +++ b/docs/architecture/05-chunker-pipeline.md @@ -0,0 +1,290 @@ +# Chunker Pipeline — Execution Path + +## Overview + +The chunker pipeline splits a media file into time-based segments using FFmpeg stream-copy. Events flow from worker threads through Redis and gRPC-Web streaming to the browser UI in real time. + +**7 hops from worker thread to pixel:** + +``` +Worker thread → Pipeline._emit() → event_bridge() → Redis RPUSH + → [50ms poll] gRPC server LRANGE → yield protobuf + → HTTP/2 frame → Envoy (grpc-web filter) + → HTTP/1.1 chunk → nginx (proxy_buffering off) + → fetch ReadableStream → protobuf-ts decode + → setEvents([...prev, evt]) → React re-render +``` + +--- + +## Step 1: Job Creation (Browser → GraphQL → Celery) + +``` +User clicks "Start" + → App.tsx: handleStart(config) + → api.ts: createChunkJob(config) + → POST /graphql (nginx :80 → fastapi:8702) + → graphql.py: Mutation.create_chunk_job() + → core.db: creates ChunkJob row in Postgres + → Celery: run_job.delay(job_type="chunk", job_id=..., payload=...) + → Returns { id, celery_task_id } to browser + → App.tsx: setJobId(id) — triggers gRPC stream subscription +``` + +**Files:** `ui/chunker/src/api.ts`, `core/api/graphql.py`, `core/jobs/task.py` + +--- + +## Step 2: gRPC-Web Stream (Browser → nginx → Envoy → gRPC Server) + +Once `jobId` is set, `useGrpcStream(jobId)` opens a server-streaming RPC: + +``` +useGrpcStream(jobId) fires useEffect + → GrpcWebFetchTransport({ baseUrl: "/grpc-web" }) + → WorkerServiceClient.streamChunkPipeline({ jobId }) + → fetch() POST to /grpc-web/worker.WorkerService/StreamChunkPipeline + → nginx :80 /grpc-web/ (proxy_pass → envoy:8090, proxy_buffering off) + → Envoy :8090 (grpc_web filter: HTTP/1.1 grpc-web → HTTP/2 native gRPC) + → gRPC server :50051 WorkerServicer.StreamChunkPipeline() + → Enters Redis polling loop (Step 5) +``` + +**Files:** `ui/chunker/src/hooks/useGrpcStream.ts`, `ctrl/nginx.conf`, `ctrl/envoy.yaml`, `core/rpc/server.py` + +**Key nginx config:** `proxy_buffering off` is critical — without it, nginx collects the entire upstream response before forwarding, defeating streaming entirely. + +--- + +## Step 3: Celery Worker → ChunkHandler + +``` +Celery picks up run_job task + → task.py: run_job(job_type="chunk", job_id, payload) + → registry.get_handler("chunk") → ChunkHandler + → chunk.py: ChunkHandler.process(job_id, payload) + → download_to_temp(BUCKET_IN, source_key) — pulls source from MinIO/S3 + → Creates output_dir: /app/media/out/chunks/{job_id}/ + → Constructs event_bridge callback (bridges Pipeline events → Redis) + → pipeline = Pipeline(source, ..., event_callback=event_bridge, output_dir=...) + → pipeline.run() +``` + +**Files:** `core/jobs/task.py`, `core/jobs/handlers/chunk.py` + +The `event_bridge` closure wraps every `Pipeline._emit()` call, forwarding to `push_event(job_id, event_type, data)` which writes to Redis. + +--- + +## Step 4: Pipeline Orchestration (inside Celery worker process) + +`Pipeline.run()` spawns multiple threads: + +``` +pipeline.run(): + │ + ├─ Chunker(source, chunk_duration) + │ → ffprobe source file → gets duration, file_size + │ → calculates total_chunks = ceil(duration / chunk_duration) + │ + ├─ _emit("pipeline_start", {...}) → event_bridge → Redis + ├─ _emit("pipeline_info", {file_size, duration, total_chunks}) → Redis + │ + ├─ Creates ChunkQueue(maxsize=10) + ├─ Creates WorkerPool(num_workers=N, chunk_queue, processor, event_callback) + │ + ├─ pool.start() — spawns N worker threads + │ + ├─ MONITOR THREAD starts (_monitor_progress) + │ → Every 500ms: _emit("pipeline_progress", {elapsed, throughput_mbps}) → Redis + │ + ├─ PRODUCER THREAD starts (_produce_chunks) + │ → Iterates chunker.chunks() → yields Chunk(sequence, start_time, end_time) + │ → For each: chunk_queue.put(chunk) + │ → _emit("chunk_queued", {sequence, start_time, end_time, queue_size}) → Redis + │ → chunk_queue.close() when done (sends N sentinel Nones) + │ + ├─ WORKER THREADS (N concurrent, each runs worker.py:Worker.run()) + │ │ Each worker loops: + │ │ + │ ├─ chunk = chunk_queue.get(timeout=1.0) + │ ├─ _emit("chunk_processing", {sequence, state:"processing", queue_size}) → Redis + │ │ + │ ├─ processor.process(chunk) + │ │ ├─ ffmpeg: runs `ffmpeg -ss start -to end -c copy chunk_NNNN.mp4` + │ │ ├─ simulated_decode: sleep(random) + checksum + │ │ └─ checksum: reads bytes, computes hash + │ │ + │ ├─ On success: _emit("chunk_done", {sequence, processing_time, retries, queue_size}) → Redis + │ ├─ On failure: retries with exponential backoff (0.1s, 0.2s, 0.4s...) + │ │ └─ _emit("chunk_retry", {sequence, attempt, backoff}) → Redis + │ │ └─ _emit("chunk_error", {sequence, error, retries}) → Redis (after exhaustion) + │ │ + │ └─ On sentinel (None): _emit("worker_status", {state:"stopped"}) → Redis + │ + ├─ pool.wait() — joins all worker threads, collects results + ├─ monitor_stop.set() — stops progress monitor + │ + ├─ ResultCollector — reassembles results in sequence order + │ └─ _emit("chunk_collected", {sequence, buffered, emitted}) → Redis + │ + ├─ Writes manifest.json to output_dir + │ + └─ _emit("pipeline_complete", {total_chunks, processed, failed, elapsed, throughput}) → Redis +``` + +**Files:** `core/chunker/pipeline.py`, `core/chunker/worker.py`, `core/chunker/pool.py`, `core/chunker/chunker.py`, `core/chunker/collector.py` + +--- + +## Step 5: Redis — the Event Bus + +``` +WRITE side (Celery worker, all threads): + push_event(job_id, event_type, data) + → json.dumps({"event": event_type, ...data}) + → Redis RPUSH to key "chunk_events:{job_id}" + → Redis EXPIRE 3600 (1 hour TTL) + +READ side (gRPC server, StreamChunkPipeline): + poll_events(job_id, cursor) + → Redis LRANGE "chunk_events:{job_id}" cursor -1 + → Returns (parsed_events, new_cursor) + → Called every 50ms (time.sleep(0.05) in server loop) +``` + +Redis acts as a decoupling layer between the Celery worker process (which runs the pipeline) and the gRPC server process (which streams to browsers). Events are appended with RPUSH and read with cursor-based LRANGE polling. + +**Files:** `core/events.py` + +--- + +## Step 6: gRPC Server → Envoy → nginx → Browser + +``` +server.py: StreamChunkPipeline polling loop: + while context.is_active(): + events, cursor = poll_events(job_id, cursor) ← Redis LRANGE + for data in events: + yield worker_pb2.ChunkPipelineEvent( ← serialized protobuf message + job_id, event_type, sequence, worker_id, + state, queue_size, elapsed, throughput_mbps, + total_chunks, processed_chunks, failed_chunks, + error, processing_time, retries + ) + if event_type in ("pipeline_complete", "pipeline_error"): + return ← ends the stream + time.sleep(0.05) ← 50ms poll interval + + Each yield sends: + → gRPC HTTP/2 DATA frame to Envoy + → Envoy grpc_web filter: HTTP/2 → base64-encoded grpc-web-text + → nginx proxy_pass (proxy_buffering off) → chunked HTTP/1.1 to browser + → fetch() ReadableStream in GrpcWebFetchTransport + → @protobuf-ts decodes protobuf → ChunkPipelineEvent TypeScript object +``` + +**Files:** `core/rpc/server.py`, `ctrl/envoy.yaml`, `ctrl/nginx.conf`, `ui/common/api/grpc/worker.ts`, `ui/common/api/grpc/worker.client.ts` + +--- + +## Step 7: React State Derivation and Rendering + +``` +useGrpcStream.ts: + for await (const msg of stream.responses): + const evt = toEvent(msg) ← maps protobuf camelCase → snake_case PipelineEvent + setEvents(prev => [...prev, evt]) ← appends to events array + if pipeline_complete/error → setDone(true), break + +App.tsx useMemo(events): + Iterates ALL events on every update, derives: + ├─ chunkMap: Map — state machine per chunk + │ pending → queued → processing → done/error/retry + ├─ workerMap: Map — state per worker + │ idle → processing → idle → ... → stopped + ├─ stats: PipelineStats + │ total_chunks, processed, failed, retries, elapsed, throughput_mbps, queue_size + ├─ errors: ErrorEntry[] — every event containing an error field + └─ queueSize: number — last seen queue_size value + + Renders: + ├─ ChunkGrid — colored cells per chunk (pending/queued/processing/done/error) + ├─ QueueGauge — current queue depth / max + ├─ WorkerPanel — per-worker state + current chunk assignment + ├─ StatsPanel — elapsed time, throughput, processed/failed counts + ├─ ErrorLog — scrollable error list + └─ OutputFiles — download links (when done) +``` + +**Files:** `ui/chunker/src/hooks/useGrpcStream.ts`, `ui/chunker/src/App.tsx` + +--- + +## Step 8: Output File Access (after pipeline completes) + +``` +App.tsx useEffect([done, jobId]): + → api.ts: getChunkOutputFiles(jobId) + → POST /graphql → graphql.py: chunk_output_files(job_id) + → Reads /app/media/out/chunks/{job_id}/ directory listing from disk + → Returns [{key, size, url: "/media/out/chunks/{job_id}/chunk_0001.mp4"}] + → Browser renders download links + → Click link → nginx /media/out/ → alias /app/media/out/ → serves file from disk +``` + +Chunks are written directly to `media/out/chunks/{job_id}/` by the ffmpeg processor — no MinIO upload needed for output. Nginx serves them with `autoindex on`. + +**Files:** `core/api/graphql.py`, `core/jobs/handlers/chunk.py`, `ctrl/nginx.conf` + +--- + +## Event Types Reference + +| Event | Source | Key Fields | +|-------|--------|------------| +| `pipeline_start` | Pipeline.run() | source, chunk_duration, num_workers, processor_type | +| `pipeline_info` | Pipeline.run() | file_size, source_duration, total_chunks | +| `pipeline_progress` | Monitor thread (500ms) | elapsed, throughput_mbps | +| `chunk_queued` | Producer thread | sequence, start_time, end_time, duration, queue_size | +| `chunk_processing` | Worker thread | sequence, worker_id, state, queue_size | +| `chunk_done` | Worker thread | sequence, processing_time, retries, queue_size | +| `chunk_retry` | Worker thread | sequence, attempt, backoff | +| `chunk_error` | Worker thread | sequence, error, retries | +| `chunk_collected` | ResultCollector | sequence, buffered, emitted | +| `worker_status` | Worker thread | worker_id, state (idle/processing/stopped) | +| `pipeline_complete` | Pipeline.run() | total_chunks, processed, failed, elapsed, throughput_mbps | +| `pipeline_error` | Pipeline.run() | error | + +--- + +## Thread Model (inside Celery worker) + +``` +Celery worker process + └─ run_job task thread + └─ Pipeline.run() + ├─ Producer thread — enqueues chunks + ├─ Monitor thread — emits progress every 500ms + ├─ Worker thread 0 — pulls from queue, processes + ├─ Worker thread 1 — pulls from queue, processes + ├─ Worker thread 2 — pulls from queue, processes + └─ Worker thread 3 — pulls from queue, processes +``` + +All threads share the same `event_callback` → `event_bridge` → `push_event()`, which creates a new Redis connection per call. Thread-safe via Redis atomic RPUSH. + +--- + +## Infrastructure + +| Service | Port | Role | +|---------|------|------| +| nginx | 80 | Reverse proxy, static file serving | +| fastapi | 8702 | GraphQL API (Strawberry) | +| celery | — | Task worker (runs pipeline) | +| redis | 6379 | Event bus + Celery broker | +| grpc | 50051 | gRPC server (StreamChunkPipeline) | +| envoy | 8090 | gRPC-Web ↔ native gRPC translation | +| minio | 9000 | S3-compatible source media storage | +| postgres | 5432 | Job/asset metadata | diff --git a/docs/architecture/index.html b/docs/architecture/index.html deleted file mode 100644 index e6651e7..0000000 --- a/docs/architecture/index.html +++ /dev/null @@ -1,212 +0,0 @@ - - - - - - MPR - Architecture - - - -

MPR - Media Processor

-

- Media transcoding platform with dual execution modes: local (Celery - + MinIO) and cloud (AWS Step Functions + Lambda + S3). -

- - - -

System Overview

-
-
-

Local Architecture (Development)

- - Local Architecture - - Open full size -
-
-

AWS Architecture (Production)

- - AWS Architecture - - Open full size -
-
- -
-

Components

-
    -
  • - - Reverse Proxy (nginx) -
  • -
  • - - Application Layer (Django Admin, GraphQL API, Timeline UI) -
  • -
  • - - Worker Layer (Celery local mode) -
  • -
  • - - AWS (Step Functions, Lambda - cloud mode) -
  • -
  • - - Data Layer (PostgreSQL, Redis) -
  • -
  • - - S3 Storage (MinIO local / AWS S3 cloud) -
  • -
-
- -

Data Model

-
-
-

Entity Relationships

- - Data Model - - Open full size -
-
- -
-

Entities

-
    -
  • - - MediaAsset - Video/audio files (S3 keys as paths) -
  • -
  • - - TranscodePreset - Encoding configurations -
  • -
  • - - TranscodeJob - Processing queue (celery_task_id or - execution_arn) -
  • -
-
- -

Job Flow

-
-
-

Job Lifecycle

- - Job Flow - - Open full size -
-
- -
-

Job States

-
    -
  • - - PENDING - Waiting in queue -
  • -
  • - - PROCESSING - Worker executing -
  • -
  • - - COMPLETED - Success -
  • -
  • - - FAILED - Error occurred -
  • -
  • - - CANCELLED - User cancelled -
  • -
-

Execution Modes

-
    -
  • - - Local: Celery + MinIO (S3 API) + FFmpeg -
  • -
  • - - Lambda: Step Functions + Lambda + AWS S3 -
  • -
-
- -

Media Storage

-
-

- MPR separates media into input and output paths for flexible - storage configuration. -

-

- View Media Storage Documentation → -

-
- -

API (GraphQL)

-
# GraphiQL IDE
-http://mpr.local.ar/graphql
-
-# Queries
-query { assets(status: "ready") { id filename duration } }
-query { jobs(status: "processing") { id status progress } }
-query { presets { id name container videoCodec } }
-query { systemStatus { status version } }
-
-# Mutations
-mutation { scanMediaFolder { found registered skipped } }
-mutation { createJob(input: { sourceAssetId: "...", presetId: "..." }) { id status } }
-mutation { cancelJob(id: "...") { id status } }
-mutation { retryJob(id: "...") { id status } }
-mutation { updateAsset(id: "...", input: { comments: "..." }) { id comments } }
-mutation { deleteAsset(id: "...") { ok } }
-
-# Lambda callback (REST)
-POST /api/jobs/{id}/callback      - Lambda completion webhook
- -

Access Points

-
# Local development
-127.0.0.1 mpr.local.ar
-http://mpr.local.ar/admin         - Django Admin
-http://mpr.local.ar/graphql       - GraphiQL
-http://mpr.local.ar/              - Timeline UI
-http://localhost:9001              - MinIO Console
-
-# AWS deployment
-https://mpr.mcrn.ar/              - Production
- -

Quick Reference

-
# Render SVGs from DOT files
-for f in *.dot; do dot -Tsvg "$f" -o "${f%.dot}.svg"; done
-
-# Switch executor mode
-MPR_EXECUTOR=local    # Celery + MinIO
-MPR_EXECUTOR=lambda   # Step Functions + Lambda + S3
- - diff --git a/docs/architecture/styles.css b/docs/architecture/styles.css index ef23579..b3094f2 100644 --- a/docs/architecture/styles.css +++ b/docs/architecture/styles.css @@ -3,6 +3,8 @@ --text-color: #e8e8e8; --accent-color: #4a90d9; --border-color: #333; + --sidebar-width: 220px; + --sidebar-bg: #151528; } * { @@ -16,6 +18,59 @@ body { background-color: var(--bg-color); color: var(--text-color); line-height: 1.6; +} + +/* Sidebar navigation */ +.sidebar { + position: fixed; + top: 0; + left: 0; + width: var(--sidebar-width); + height: 100vh; + background: var(--sidebar-bg); + border-right: 1px solid var(--border-color); + padding: 1.5rem 1rem; + overflow-y: auto; + z-index: 10; +} + +.sidebar h2 { + font-size: 1.2rem; + color: var(--accent-color); + margin-bottom: 1.5rem; + padding-bottom: 0.5rem; + border-bottom: 1px solid var(--border-color); +} + +.sidebar ul { + list-style: none; + display: flex; + flex-direction: column; + gap: 0.25rem; +} + +.sidebar li { + display: block; +} + +.sidebar a { + display: block; + padding: 0.4rem 0.6rem; + color: var(--text-color); + text-decoration: none; + font-size: 0.85rem; + border-radius: 4px; + transition: background 0.15s, color 0.15s; +} + +.sidebar a:hover { + background: rgba(74, 144, 217, 0.15); + color: var(--accent-color); +} + +/* Main content */ +.content { + margin-left: var(--sidebar-width); padding: 2rem; } @@ -25,12 +80,13 @@ h1 { color: var(--accent-color); } -h2 { +.content > h2 { font-size: 1.5rem; margin: 2rem 0 1rem; color: var(--text-color); border-bottom: 1px solid var(--border-color); padding-bottom: 0.5rem; + scroll-margin-top: 1rem; } .diagram-container { @@ -76,20 +132,6 @@ h2 { text-decoration: underline; } -nav { - margin-bottom: 2rem; -} - -nav a { - color: var(--accent-color); - text-decoration: none; - margin-right: 1.5rem; -} - -nav a:hover { - text-decoration: underline; -} - .legend { margin-top: 2rem; padding: 1rem; @@ -141,3 +183,27 @@ pre code { background: none; padding: 0; } + +/* Responsive: collapse sidebar on small screens */ +@media (max-width: 768px) { + .sidebar { + position: static; + width: 100%; + height: auto; + border-right: none; + border-bottom: 1px solid var(--border-color); + } + + .sidebar ul { + flex-direction: row; + flex-wrap: wrap; + } + + .content { + margin-left: 0; + } + + .diagram { + min-width: 100%; + } +} diff --git a/docs/index.html b/docs/index.html index 7b5f774..413af1a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,219 +7,241 @@ -

MPR - Media Processor

-

- Media transcoding platform with three execution modes: local (Celery - + MinIO), AWS (Step Functions + Lambda + S3), and GCP (Cloud Run - Jobs + GCS). Storage is S3-compatible across all environments. -

- -