merge chunker

This commit is contained in:
2026-03-23 02:56:13 -03:00
102 changed files with 11232 additions and 936 deletions

View File

@@ -7,4 +7,4 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "admin.mpr.settings")
app = Celery("mpr")
app.config_from_object("django.conf:settings", namespace="CELERY")
app.autodiscover_tasks()
app.autodiscover_tasks(["core.task"])
app.autodiscover_tasks(["core.jobs"])

View File

@@ -19,6 +19,15 @@ class JobStatus(models.TextChoices):
FAILED = "failed", "Failed"
CANCELLED = "cancelled", "Cancelled"
class ChunkJobStatus(models.TextChoices):
PENDING = "pending", "Pending"
CHUNKING = "chunking", "Chunking"
PROCESSING = "processing", "Processing"
COLLECTING = "collecting", "Collecting"
COMPLETED = "completed", "Completed"
FAILED = "failed", "Failed"
CANCELLED = "cancelled", "Cancelled"
class MediaAsset(models.Model):
"""A video/audio file registered in the system."""
@@ -108,3 +117,34 @@ class TranscodeJob(models.Model):
def __str__(self):
return str(self.id)
class ChunkJob(models.Model):
"""A chunk pipeline job — splits a media file into chunks and processes them"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
source_asset_id = models.UUIDField()
chunk_duration = models.FloatField(default=10.0)
num_workers = models.IntegerField(default=4)
max_retries = models.IntegerField(default=3)
processor_type = models.CharField(max_length=255)
status = models.CharField(max_length=20, choices=ChunkJobStatus.choices, default=ChunkJobStatus.PENDING)
progress = models.FloatField(default=0.0)
total_chunks = models.IntegerField(default=0)
processed_chunks = models.IntegerField(default=0)
failed_chunks = models.IntegerField(default=0)
retry_count = models.IntegerField(default=0)
error_message = models.TextField(blank=True, default='')
throughput_mbps = models.FloatField(null=True, blank=True, default=None)
elapsed_seconds = models.FloatField(null=True, blank=True, default=None)
celery_task_id = models.CharField(max_length=255, null=True, blank=True)
priority = models.IntegerField(default=0)
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
completed_at = models.DateTimeField(null=True, blank=True)
class Meta:
ordering = ["-created_at"]
def __str__(self):
return str(self.id)

73
core/api/chunker_sse.py Normal file
View File

@@ -0,0 +1,73 @@
"""
SSE endpoint for chunker pipeline events.
Uses Redis as the event bus between Celery workers and the SSE stream.
Celery worker pushes events via core.events, SSE endpoint polls them.
GET /chunker/stream/{job_id} → text/event-stream
"""
import asyncio
import json
import logging
import time
from typing import AsyncGenerator
from fastapi import APIRouter
from starlette.responses import StreamingResponse
from core.events import poll_events
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/chunker", tags=["chunker"])
async def _event_generator(job_id: str) -> AsyncGenerator[str, None]:
"""
Generate SSE events by polling Redis for chunk job events.
"""
cursor = 0
timeout = time.monotonic() + 600 # 10 min max
while time.monotonic() < timeout:
events, cursor = poll_events(job_id, cursor)
if not events:
yield f"event: waiting\ndata: {json.dumps({'job_id': job_id})}\n\n"
await asyncio.sleep(0.1)
continue
for data in events:
event_type = data.pop("event", "update")
payload = {**data, "job_id": job_id}
yield f"event: {event_type}\ndata: {json.dumps(payload)}\n\n"
if event_type in ("pipeline_complete", "pipeline_error", "cancelled"):
yield f"event: done\ndata: {json.dumps({'job_id': job_id})}\n\n"
return
await asyncio.sleep(0.05)
yield f"event: timeout\ndata: {json.dumps({'job_id': job_id})}\n\n"
@router.get("/stream/{job_id}")
async def stream_chunk_job(job_id: str):
"""
SSE stream for a chunk pipeline job.
The UI connects via native EventSource:
const es = new EventSource('/api/chunker/stream/<job_id>');
es.addEventListener('processing', (e) => { ... });
"""
return StreamingResponse(
_event_generator(job_id),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)

View File

@@ -15,6 +15,10 @@ from strawberry.schema.config import StrawberryConfig
from strawberry.types import Info
from core.api.schema.graphql import (
CancelResultType,
ChunkJobType,
ChunkOutputFileType,
CreateChunkJobInput,
CreateJobInput,
DeleteResultType,
MediaAssetType,
@@ -24,7 +28,7 @@ from core.api.schema.graphql import (
TranscodePresetType,
UpdateAssetInput,
)
from core.storage import BUCKET_IN, list_objects
from core.storage import BUCKET_IN, list_objects, upload_file
VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
@@ -88,6 +92,25 @@ class Query:
def system_status(self, info: Info) -> SystemStatusType:
return SystemStatusType(status="ok", version="0.1.0")
@strawberry.field
def chunk_output_files(self, info: Info, job_id: str) -> List[ChunkOutputFileType]:
"""List output chunk files for a completed job from media/out/."""
from pathlib import Path
media_out = os.environ.get("MEDIA_OUT_DIR", "/app/media/out")
output_dir = Path(media_out) / "chunks" / job_id
if not output_dir.is_dir():
return []
return [
ChunkOutputFileType(
key=f.name,
size=f.stat().st_size,
url=f"/media/out/chunks/{job_id}/{f.name}",
)
for f in sorted(output_dir.iterdir())
if f.is_file()
]
# ---------------------------------------------------------------------------
# Mutations
@@ -98,8 +121,26 @@ class Query:
class Mutation:
@strawberry.mutation
def scan_media_folder(self, info: Info) -> ScanResultType:
import logging
from pathlib import Path
from core.db import create_asset, get_asset_filenames
logger = logging.getLogger(__name__)
# Sync local media/in/ files to MinIO (handles fresh installs / pruned volumes)
local_media = Path("/app/media/in")
if local_media.is_dir():
existing_keys = {o["key"] for o in list_objects(BUCKET_IN)}
for f in local_media.iterdir():
if f.is_file() and f.suffix.lower() in MEDIA_EXTS:
if f.name not in existing_keys:
try:
upload_file(str(f), BUCKET_IN, f.name)
logger.info("Uploaded %s to MinIO", f.name)
except Exception as e:
logger.warning("Failed to upload %s: %s", f.name, e)
objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
existing = get_asset_filenames()
@@ -172,30 +213,31 @@ class Mutation:
priority=input.priority or 0,
)
payload = {
"source_key": source.file_path,
"output_key": output_filename,
"preset": preset_snapshot or None,
"trim_start": input.trim_start,
"trim_end": input.trim_end,
"duration": source.duration,
}
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode in ("lambda", "gcp"):
from core.task.executor import get_executor
from core.jobs.executor import get_executor
get_executor().run(
job_type="transcode",
job_id=str(job.id),
source_path=source.file_path,
output_path=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
payload=payload,
)
else:
from core.task.tasks import run_transcode_job
from core.jobs.task import run_job
result = run_transcode_job.delay(
result = run_job.delay(
job_type="transcode",
job_id=str(job.id),
source_key=source.file_path,
output_key=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
payload=payload,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
@@ -261,6 +303,75 @@ class Mutation:
except Exception:
raise Exception("Asset not found")
@strawberry.mutation
def create_chunk_job(self, info: Info, input: CreateChunkJobInput) -> ChunkJobType:
"""Create and dispatch a chunk pipeline job."""
import uuid
from core.db import get_asset
try:
source = get_asset(input.source_asset_id)
except Exception:
raise Exception("Source asset not found")
job_id = str(uuid.uuid4())
payload = {
"source_key": source.file_path,
"chunk_duration": input.chunk_duration,
"num_workers": input.num_workers,
"max_retries": input.max_retries,
"processor_type": input.processor_type,
"start_time": input.start_time,
"end_time": input.end_time,
}
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
celery_task_id = None
if executor_mode in ("lambda", "gcp"):
from core.jobs.executor import get_executor
get_executor().run(
job_type="chunk",
job_id=job_id,
payload=payload,
)
else:
from core.jobs.task import run_job
result = run_job.delay(
job_type="chunk",
job_id=job_id,
payload=payload,
)
celery_task_id = result.id
return ChunkJobType(
id=uuid.UUID(job_id),
source_asset_id=input.source_asset_id,
chunk_duration=input.chunk_duration,
num_workers=input.num_workers,
max_retries=input.max_retries,
processor_type=input.processor_type,
status="pending",
progress=0.0,
priority=input.priority,
celery_task_id=celery_task_id,
)
@strawberry.mutation
def cancel_chunk_job(self, info: Info, celery_task_id: str) -> CancelResultType:
"""Cancel a running chunk job by revoking its Celery task."""
try:
from admin.mpr.celery import app as celery_app
celery_app.control.revoke(celery_task_id, terminate=True, signal="SIGTERM")
return CancelResultType(ok=True, message="Task revoked")
except Exception as e:
return CancelResultType(ok=False, message=str(e))
# ---------------------------------------------------------------------------
# Schema

View File

@@ -23,6 +23,7 @@ from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from strawberry.fastapi import GraphQLRouter
from core.api.chunker_sse import router as chunker_router
from core.api.graphql import schema as graphql_schema
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@@ -48,6 +49,9 @@ app.add_middleware(
graphql_router = GraphQLRouter(schema=graphql_schema, graphql_ide="graphiql")
app.include_router(graphql_router, prefix="/graphql")
# Chunker SSE
app.include_router(chunker_router)
@app.get("/")
def root():

View File

@@ -37,7 +37,7 @@ class MediaAssetType:
file_path: Optional[str] = None
status: Optional[str] = None
error_message: Optional[str] = None
file_size: Optional[int] = None
file_size: Optional[float] = None
duration: Optional[float] = None
video_codec: Optional[str] = None
audio_codec: Optional[str] = None
@@ -156,3 +156,71 @@ class WorkerStatusType:
active_jobs: Optional[int] = None
supported_codecs: Optional[List[str]] = None
gpu_available: Optional[bool] = None
@strawberry.enum
class ChunkJobStatus(Enum):
PENDING = "pending"
CHUNKING = "chunking"
PROCESSING = "processing"
COLLECTING = "collecting"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@strawberry.type
class ChunkJobType:
"""A chunk pipeline job."""
id: Optional[UUID] = None
source_asset_id: Optional[UUID] = None
chunk_duration: Optional[float] = None
num_workers: Optional[int] = None
max_retries: Optional[int] = None
processor_type: Optional[str] = None
status: Optional[str] = None
progress: Optional[float] = None
total_chunks: Optional[int] = None
processed_chunks: Optional[int] = None
failed_chunks: Optional[int] = None
retry_count: Optional[int] = None
error_message: Optional[str] = None
throughput_mbps: Optional[float] = None
elapsed_seconds: Optional[float] = None
celery_task_id: Optional[str] = None
priority: Optional[int] = None
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@strawberry.input
class CreateChunkJobInput:
"""Request body for creating a chunk pipeline job."""
source_asset_id: UUID
chunk_duration: float = 10.0
num_workers: int = 4
max_retries: int = 3
processor_type: str = "ffmpeg"
priority: int = 0
start_time: Optional[float] = None
end_time: Optional[float] = None
@strawberry.type
class CancelResultType:
"""Result of cancelling a chunk job."""
ok: bool = False
message: Optional[str] = None
@strawberry.type
class ChunkOutputFileType:
"""A chunk output file in S3/MinIO with presigned download URL."""
key: str
size: int = 0
url: str = ""

64
core/chunker/__init__.py Normal file
View File

@@ -0,0 +1,64 @@
"""
Chunker pipeline — splits files into chunks, processes concurrently, reassembles in order.
Public API:
Pipeline — orchestrates the full pipeline
PipelineResult — aggregate result dataclass
Chunker — file → Chunk generator
ChunkQueue — bounded thread-safe queue
WorkerPool — manages N worker threads
ResultCollector — heapq-based ordered reassembly
"""
from .chunker import Chunker
from .collector import ResultCollector
from .exceptions import (
ChunkChecksumError,
ChunkError,
ChunkReadError,
PipelineError,
ProcessingError,
ProcessorFailureError,
ProcessorTimeoutError,
ReassemblyError,
)
from .models import Chunk, ChunkResult, PipelineResult
from .pipeline import Pipeline
from .pool import WorkerPool
from .processor import (
ChecksumProcessor,
CompositeProcessor,
FFmpegExtractProcessor,
Processor,
SimulatedDecodeProcessor,
)
from .queue import ChunkQueue
__all__ = [
# Core
"Pipeline",
"PipelineResult",
# Components
"Chunker",
"ChunkQueue",
"WorkerPool",
"ResultCollector",
# Models
"Chunk",
"ChunkResult",
# Processors
"Processor",
"ChecksumProcessor",
"SimulatedDecodeProcessor",
"CompositeProcessor",
"FFmpegExtractProcessor",
# Exceptions
"PipelineError",
"ChunkError",
"ChunkReadError",
"ChunkChecksumError",
"ProcessingError",
"ProcessorFailureError",
"ProcessorTimeoutError",
"ReassemblyError",
]

101
core/chunker/chunker.py Normal file
View File

@@ -0,0 +1,101 @@
"""
Chunker — probes a media file and yields time-based Chunk objects.
Demonstrates:
- Function parameters and defaults (Interview Topic 1)
- List comprehensions and efficient iteration / generators (Interview Topic 3)
"""
import math
import os
from typing import Generator
from core.ffmpeg.probe import probe_file
from .exceptions import ChunkReadError
from .models import Chunk
class Chunker:
"""
Splits a media file into time-based chunks via a generator.
Uses FFmpeg probe to get duration, then yields Chunk objects
representing time segments (no data read — extraction happens in the processor).
Args:
file_path: Path to the source media file
chunk_duration: Duration of each chunk in seconds (default: 10.0)
"""
def __init__(
self,
file_path: str,
chunk_duration: float = 10.0,
start_time: float | None = None,
end_time: float | None = None,
):
if not os.path.isfile(file_path):
raise ChunkReadError(f"File not found: {file_path}")
if chunk_duration <= 0:
raise ValueError("chunk_duration must be positive")
self.file_path = file_path
self.chunk_duration = chunk_duration
self.file_size = os.path.getsize(file_path)
full_duration = self._probe_duration()
# Apply time range
self.range_start = max(start_time or 0.0, 0.0)
self.range_end = min(end_time or full_duration, full_duration)
if self.range_start >= self.range_end:
raise ValueError(
f"Invalid range: start={self.range_start} >= end={self.range_end}"
)
self.source_duration = self.range_end - self.range_start
def _probe_duration(self) -> float:
"""Get source file duration via FFmpeg probe."""
try:
result = probe_file(self.file_path)
if result.duration is None or result.duration <= 0:
raise ChunkReadError(
f"Cannot determine duration for {self.file_path}"
)
return result.duration
except ChunkReadError:
raise
except Exception as e:
raise ChunkReadError(
f"Failed to probe {self.file_path}: {e}"
) from e
@property
def expected_chunks(self) -> int:
"""Calculate expected number of chunks (last chunk may be shorter)."""
if self.source_duration <= 0:
return 0
return math.ceil(self.source_duration / self.chunk_duration)
def chunks(self) -> Generator[Chunk, None, None]:
"""
Yield Chunk objects representing time segments of the source file.
Generator-based: chunks are yielded on demand.
Each chunk defines a time range — actual extraction is done by the processor.
"""
total = self.expected_chunks
for sequence in range(total):
start_time = self.range_start + sequence * self.chunk_duration
end_time = min(
start_time + self.chunk_duration, self.range_end
)
duration = end_time - start_time
yield Chunk(
sequence=sequence,
start_time=start_time,
end_time=end_time,
source_path=self.file_path,
duration=duration,
)

98
core/chunker/collector.py Normal file
View File

@@ -0,0 +1,98 @@
"""
ResultCollector — reassembles chunk results in sequence order using a min-heap.
Demonstrates:
- Algorithms and sorting (Interview Topic 6) — heapq for ordered reassembly
- Core data structures (Interview Topic 5) — heap, deque
"""
import heapq
from collections import deque
from typing import List
from .exceptions import ReassemblyError
from .models import ChunkResult
class ResultCollector:
"""
Receives ChunkResults out of order, emits them in sequence order.
Uses a min-heap keyed on sequence number. Only emits a chunk when
all prior sequences have been accounted for.
Args:
total_chunks: Expected total number of chunks
"""
def __init__(self, total_chunks: int):
self.total_chunks = total_chunks
self._heap: List[tuple[int, ChunkResult]] = []
self._next_sequence = 0
self._emitted: List[ChunkResult] = []
self._seen_sequences: set[int] = set()
# Sliding window for throughput calculation
self._recent_times: deque[float] = deque(maxlen=50)
def add(self, result: ChunkResult) -> List[ChunkResult]:
"""
Add a result and return any newly emittable results in order.
Args:
result: A ChunkResult (may arrive out of order)
Returns:
List of results that can now be emitted in sequence order
(may be empty if we're still waiting for earlier sequences)
Raises:
ReassemblyError: If a duplicate sequence is received
"""
if result.sequence in self._seen_sequences:
raise ReassemblyError(
f"Duplicate sequence number: {result.sequence}"
)
self._seen_sequences.add(result.sequence)
# Track processing time for throughput
if result.processing_time > 0:
self._recent_times.append(result.processing_time)
# Push to min-heap
heapq.heappush(self._heap, (result.sequence, result))
# Emit all consecutive results starting from _next_sequence
newly_emitted = []
while self._heap and self._heap[0][0] == self._next_sequence:
_, emitted_result = heapq.heappop(self._heap)
self._emitted.append(emitted_result)
newly_emitted.append(emitted_result)
self._next_sequence += 1
return newly_emitted
@property
def is_complete(self) -> bool:
"""True if all expected chunks have been emitted in order."""
return self._next_sequence == self.total_chunks
@property
def buffered_count(self) -> int:
"""Number of results waiting in the heap (arrived out of order)."""
return len(self._heap)
@property
def emitted_count(self) -> int:
"""Number of results emitted in sequence order."""
return len(self._emitted)
@property
def avg_processing_time(self) -> float:
"""Average processing time from recent results (sliding window)."""
if not self._recent_times:
return 0.0
return sum(self._recent_times) / len(self._recent_times)
def get_ordered_results(self) -> List[ChunkResult]:
"""Get all emitted results in sequence order."""
return list(self._emitted)

View File

@@ -0,0 +1,64 @@
"""
Chunker exception hierarchy.
Demonstrates: Managing exceptions and writing resilient code (Interview Topic 7).
"""
class PipelineError(Exception):
"""Base exception for all chunker pipeline errors."""
pass
class ChunkError(PipelineError):
"""Errors related to chunk creation or validation."""
pass
class ChunkReadError(ChunkError):
"""Failed to read chunk data from source file."""
pass
class ChunkChecksumError(ChunkError):
"""Chunk data integrity validation failed."""
def __init__(self, sequence: int, expected: str, actual: str):
self.sequence = sequence
self.expected = expected
self.actual = actual
super().__init__(
f"Chunk {sequence}: checksum mismatch "
f"(expected={expected}, actual={actual})"
)
class ProcessingError(PipelineError):
"""Errors during chunk processing by workers."""
pass
class ProcessorTimeoutError(ProcessingError):
"""Processor exceeded allowed time for a chunk."""
def __init__(self, sequence: int, timeout: float):
self.sequence = sequence
self.timeout = timeout
super().__init__(f"Chunk {sequence}: processor timed out after {timeout}s")
class ProcessorFailureError(ProcessingError):
"""Processor failed to process a chunk after all retries."""
def __init__(self, sequence: int, retries: int, original_error: Exception):
self.sequence = sequence
self.retries = retries
self.original_error = original_error
super().__init__(
f"Chunk {sequence}: failed after {retries} retries — {original_error}"
)
class ReassemblyError(PipelineError):
"""Errors during result collection and ordering."""
pass

54
core/chunker/models.py Normal file
View File

@@ -0,0 +1,54 @@
"""
Internal data models for the chunker pipeline.
These are pipeline-internal dataclasses, not schema models.
Schema-level ChunkJob is in core/schema/models/jobs.py.
Demonstrates: Core data structures (Interview Topic 5).
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class Chunk:
"""A time-based segment of the source media file."""
sequence: int
start_time: float # seconds
end_time: float # seconds
source_path: str # path to source file
duration: float # end_time - start_time
checksum: str = "" # computed after extraction
@dataclass
class ChunkResult:
"""Result of processing a single chunk."""
sequence: int
success: bool
checksum_valid: bool = True
processing_time: float = 0.0
error: Optional[str] = None
retries: int = 0
worker_id: Optional[str] = None
output_file: Optional[str] = None
@dataclass
class PipelineResult:
"""Aggregate result of the entire pipeline run."""
total_chunks: int = 0
processed: int = 0
failed: int = 0
retries: int = 0
elapsed_time: float = 0.0
throughput_mbps: float = 0.0
worker_stats: Dict[str, Any] = field(default_factory=dict)
errors: List[str] = field(default_factory=list)
chunks_in_order: bool = True
output_dir: Optional[str] = None
chunk_files: List[str] = field(default_factory=list)

279
core/chunker/pipeline.py Normal file
View File

@@ -0,0 +1,279 @@
"""
Pipeline — orchestrates the entire chunker pipeline.
Wires: Chunker → ChunkQueue → WorkerPool → ResultCollector → PipelineResult
Demonstrates:
- Function parameters and defaults (Interview Topic 1) — configurable pipeline
- Concurrency (Interview Topic 2) — producer thread + worker pool
- OOP design (Interview Topic 4) — composition of pipeline components
- Exception handling (Interview Topic 7) — graceful error propagation
"""
import json
import logging
import threading
import time
from pathlib import Path
from typing import Any, Callable, Dict, Optional
from .chunker import Chunker
from .collector import ResultCollector
from .exceptions import PipelineError
from .models import PipelineResult
from .pool import WorkerPool
from .queue import ChunkQueue
logger = logging.getLogger(__name__)
class Pipeline:
"""
Orchestrates the chunk processing pipeline.
The pipeline runs in three stages:
1. Producer thread: Chunker probes file → pushes time-based chunks to ChunkQueue
2. Worker pool: N workers pull from queue → extract mp4 segments → emit results
3. Collector: ResultCollector reassembles results in sequence order
Args:
source: Path to the source media file
chunk_duration: Duration of each chunk in seconds (default: 10.0)
num_workers: Number of concurrent worker threads (default: 4)
max_retries: Max retry attempts per chunk (default: 3)
processor_type: Processor to use — "ffmpeg", "checksum", "simulated_decode", "composite"
queue_size: Max chunks buffered in queue (default: 10)
event_callback: Optional callback for real-time events
output_dir: Directory for output chunk files (required for "ffmpeg" processor)
"""
def __init__(
self,
source: str,
chunk_duration: float = 10.0,
num_workers: int = 4,
max_retries: int = 3,
processor_type: str = "checksum",
queue_size: int = 10,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
output_dir: Optional[str] = None,
start_time: Optional[float] = None,
end_time: Optional[float] = None,
):
self.source = source
self.chunk_duration = chunk_duration
self.num_workers = num_workers
self.max_retries = max_retries
self.processor_type = processor_type
self.queue_size = queue_size
self.event_callback = event_callback
self.output_dir = output_dir
self.start_time = start_time
self.end_time = end_time
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
"""Emit an event if callback is registered."""
if self.event_callback:
self.event_callback(event_type, data)
def _produce_chunks(
self, chunker: Chunker, chunk_queue: ChunkQueue
) -> None:
"""Producer thread: probe file and enqueue time-based chunks."""
try:
for chunk in chunker.chunks():
chunk_queue.put(chunk, timeout=30.0)
self._emit("chunk_queued", {
"sequence": chunk.sequence,
"start_time": chunk.start_time,
"end_time": chunk.end_time,
"duration": chunk.duration,
"queue_size": chunk_queue.qsize(),
})
except Exception as e:
logger.error(f"Producer error: {e}")
self._emit("producer_error", {"error": str(e)})
finally:
chunk_queue.close()
def _monitor_progress(
self, start_time: float, file_size: int, stop_event: threading.Event
) -> None:
"""Monitor thread: emit pipeline_progress every 500ms."""
while not stop_event.is_set():
elapsed = time.monotonic() - start_time
mb = file_size / (1024 * 1024)
self._emit("pipeline_progress", {
"elapsed": round(elapsed, 2),
"throughput_mbps": round(mb / elapsed, 2) if elapsed > 0 else 0,
})
stop_event.wait(0.5)
def _write_manifest(
self, result: PipelineResult, source_duration: float
) -> None:
"""Write manifest.json to output_dir with segment metadata."""
if not self.output_dir:
return
manifest = {
"source": self.source,
"source_duration": source_duration,
"chunk_duration": self.chunk_duration,
"total_chunks": result.total_chunks,
"processed": result.processed,
"failed": result.failed,
"elapsed_time": result.elapsed_time,
"throughput_mbps": result.throughput_mbps,
"segments": [
{
"sequence": i,
"file": f"chunk_{i:04d}.mp4",
"start": i * self.chunk_duration,
"end": min(
(i + 1) * self.chunk_duration, source_duration
),
}
for i in range(result.total_chunks)
if i < result.total_chunks
],
}
manifest_path = Path(self.output_dir) / "manifest.json"
manifest_path.write_text(json.dumps(manifest, indent=2))
logger.info(f"Manifest written to {manifest_path}")
def run(self) -> PipelineResult:
"""
Execute the full pipeline.
Returns:
PipelineResult with aggregate stats
Raises:
PipelineError: If the pipeline fails catastrophically
"""
start_time = time.monotonic()
self._emit("pipeline_start", {
"source": self.source,
"chunk_duration": self.chunk_duration,
"num_workers": self.num_workers,
"processor_type": self.processor_type,
})
try:
# Stage 1: Set up chunker (probes file for duration)
chunker = Chunker(
self.source,
self.chunk_duration,
start_time=self.start_time,
end_time=self.end_time,
)
total_chunks = chunker.expected_chunks
if total_chunks == 0:
self._emit("pipeline_complete", {"total_chunks": 0})
return PipelineResult(chunks_in_order=True)
self._emit("pipeline_info", {
"file_size": chunker.file_size,
"source_duration": chunker.source_duration,
"total_chunks": total_chunks,
})
# Stage 2: Set up queue and worker pool
chunk_queue = ChunkQueue(maxsize=self.queue_size)
pool = WorkerPool(
num_workers=self.num_workers,
chunk_queue=chunk_queue,
processor_type=self.processor_type,
max_retries=self.max_retries,
event_callback=self.event_callback,
output_dir=self.output_dir,
)
# Stage 3: Start workers, monitor, then produce chunks
pool.start()
monitor_stop = threading.Event()
monitor = threading.Thread(
target=self._monitor_progress,
args=(start_time, chunker.file_size, monitor_stop),
name="progress-monitor",
daemon=True,
)
monitor.start()
producer = threading.Thread(
target=self._produce_chunks,
args=(chunker, chunk_queue),
name="chunk-producer",
daemon=True,
)
producer.start()
# Stage 4: Wait for all workers to finish
all_results = pool.wait()
producer.join(timeout=5.0)
# Stop monitor
monitor_stop.set()
monitor.join(timeout=2.0)
# Stage 5: Collect results in order
collector = ResultCollector(total_chunks)
for r in all_results:
collector.add(r)
self._emit("chunk_collected", {
"sequence": r.sequence,
"success": r.success,
"buffered": collector.buffered_count,
"emitted": collector.emitted_count,
})
# Build result
elapsed = time.monotonic() - start_time
file_size_mb = chunker.file_size / (1024 * 1024)
throughput = file_size_mb / elapsed if elapsed > 0 else 0.0
failed_results = [r for r in all_results if not r.success]
total_retries = sum(r.retries for r in all_results)
chunk_files = [
r.output_file for r in all_results
if r.success and r.output_file
]
result = PipelineResult(
total_chunks=total_chunks,
processed=len(all_results),
failed=len(failed_results),
retries=total_retries,
elapsed_time=elapsed,
throughput_mbps=throughput,
worker_stats=pool.get_worker_stats(),
errors=[r.error for r in failed_results if r.error],
chunks_in_order=collector.is_complete,
output_dir=self.output_dir,
chunk_files=chunk_files,
)
# Write manifest if output_dir is set
self._write_manifest(result, chunker.source_duration)
pool.shutdown()
self._emit("pipeline_complete", {
"total_chunks": result.total_chunks,
"processed": result.processed,
"failed": result.failed,
"elapsed": result.elapsed_time,
"throughput_mbps": result.throughput_mbps,
})
return result
except PipelineError:
raise
except Exception as e:
self._emit("pipeline_error", {"error": str(e)})
raise PipelineError(f"Pipeline failed: {e}") from e

125
core/chunker/pool.py Normal file
View File

@@ -0,0 +1,125 @@
"""
WorkerPool — manages N worker threads via ThreadPoolExecutor.
Demonstrates: Python concurrency — threading (Interview Topic 2).
"""
import logging
import threading
from concurrent.futures import Future, ThreadPoolExecutor
from typing import Any, Callable, Dict, List, Optional
from .models import ChunkResult
from .processor import (
ChecksumProcessor,
CompositeProcessor,
FFmpegExtractProcessor,
Processor,
SimulatedDecodeProcessor,
)
from .queue import ChunkQueue
from .worker import Worker
logger = logging.getLogger(__name__)
def create_processor(
processor_type: str = "checksum",
output_dir: Optional[str] = None,
) -> Processor:
"""Factory for processor instances."""
if processor_type == "ffmpeg":
if not output_dir:
raise ValueError("output_dir required for ffmpeg processor")
return FFmpegExtractProcessor(output_dir=output_dir)
elif processor_type == "checksum":
return ChecksumProcessor()
elif processor_type == "simulated_decode":
return SimulatedDecodeProcessor()
elif processor_type == "composite":
return CompositeProcessor([
ChecksumProcessor(),
SimulatedDecodeProcessor(ms_per_second=50.0),
])
else:
raise ValueError(f"Unknown processor type: {processor_type}")
class WorkerPool:
"""
Manages N worker threads that process chunks concurrently.
Args:
num_workers: Number of concurrent worker threads (default: 4)
chunk_queue: Shared queue to pull chunks from
processor_type: Type of processor for each worker (default: "checksum")
max_retries: Max retry attempts per chunk (default: 3)
event_callback: Optional callback for real-time events
"""
def __init__(
self,
num_workers: int = 4,
chunk_queue: Optional[ChunkQueue] = None,
processor_type: str = "checksum",
max_retries: int = 3,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
output_dir: Optional[str] = None,
):
self.num_workers = num_workers
self.chunk_queue = chunk_queue or ChunkQueue()
self.processor_type = processor_type
self.max_retries = max_retries
self.event_callback = event_callback
self.output_dir = output_dir
self.shutdown_event = threading.Event()
self._executor: Optional[ThreadPoolExecutor] = None
self._futures: List[Future] = []
self._workers: List[Worker] = []
def start(self) -> None:
"""Start all worker threads."""
self._executor = ThreadPoolExecutor(
max_workers=self.num_workers,
thread_name_prefix="chunk-worker",
)
for i in range(self.num_workers):
worker = Worker(
worker_id=f"worker-{i}",
chunk_queue=self.chunk_queue,
processor=create_processor(self.processor_type, output_dir=self.output_dir),
max_retries=self.max_retries,
event_callback=self.event_callback,
)
self._workers.append(worker)
future = self._executor.submit(worker.run)
self._futures.append(future)
logger.info(f"WorkerPool started with {self.num_workers} workers")
def wait(self) -> List[ChunkResult]:
"""Wait for all workers to finish and collect results."""
all_results = []
for future in self._futures:
results = future.result()
all_results.extend(results)
return all_results
def shutdown(self) -> None:
"""Signal shutdown and cleanup."""
self.shutdown_event.set()
self.chunk_queue.close()
if self._executor:
self._executor.shutdown(wait=True)
def get_worker_stats(self) -> Dict[str, Any]:
"""Get per-worker statistics."""
return {
w.worker_id: {
"processed": w.processed_count,
"errors": w.error_count,
"retries": w.retry_count,
}
for w in self._workers
}

173
core/chunker/processor.py Normal file
View File

@@ -0,0 +1,173 @@
"""
Processor ABC and concrete implementations.
Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4).
"""
import hashlib
import time
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List
from .exceptions import ChunkChecksumError
from .models import Chunk, ChunkResult
class Processor(ABC):
"""
Abstract base class for chunk processors.
Each processor defines how a single chunk is processed.
The Worker calls processor.process(chunk) and handles retries.
"""
@abstractmethod
def process(self, chunk: Chunk) -> ChunkResult:
"""Process a single chunk and return the result."""
pass
class FFmpegExtractProcessor(Processor):
"""
Extracts a time segment from the source file using FFmpeg stream copy.
Produces a playable mp4 file per chunk — no re-encoding.
Args:
output_dir: Directory to write chunk mp4 files
"""
def __init__(self, output_dir: str):
self.output_dir = output_dir
Path(output_dir).mkdir(parents=True, exist_ok=True)
def process(self, chunk: Chunk) -> ChunkResult:
from core.ffmpeg.transcode import TranscodeConfig, transcode
start = time.monotonic()
output_file = str(
Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4"
)
config = TranscodeConfig(
input_path=chunk.source_path,
output_path=output_file,
video_codec="copy",
audio_codec="copy",
trim_start=chunk.start_time,
trim_end=chunk.end_time,
)
transcode(config)
# Compute checksum of output file
md5 = hashlib.md5()
with open(output_file, "rb") as f:
for block in iter(lambda: f.read(8192), b""):
md5.update(block)
checksum = md5.hexdigest()
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
output_file=output_file,
)
class ChecksumProcessor(Processor):
"""
Validates chunk metadata consistency.
For time-based chunks, verifies the time range is valid.
Raises ChunkChecksumError on invalid ranges.
"""
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
valid = chunk.duration > 0 and chunk.end_time > chunk.start_time
if not valid:
raise ChunkChecksumError(
sequence=chunk.sequence,
expected="valid time range",
actual=f"{chunk.start_time}-{chunk.end_time}",
)
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
)
class SimulatedDecodeProcessor(Processor):
"""
Simulates decode work by sleeping proportional to chunk duration.
Useful for demonstrating concurrency behavior without real FFmpeg.
Args:
ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100)
"""
def __init__(self, ms_per_second: float = 100.0):
self.ms_per_second = ms_per_second
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
sleep_time = (self.ms_per_second * chunk.duration) / 1000.0
time.sleep(sleep_time)
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=True,
processing_time=elapsed,
)
class CompositeProcessor(Processor):
"""
Chains multiple processors — runs each in sequence on the same chunk.
Demonstrates OOP composition pattern.
Args:
processors: List of processors to chain
"""
def __init__(self, processors: List[Processor]):
if not processors:
raise ValueError("CompositeProcessor requires at least one processor")
self.processors = processors
def process(self, chunk: Chunk) -> ChunkResult:
start = time.monotonic()
last_result = None
for proc in self.processors:
last_result = proc.process(chunk)
if not last_result.success:
return last_result
elapsed = time.monotonic() - start
return ChunkResult(
sequence=chunk.sequence,
success=True,
checksum_valid=last_result.checksum_valid if last_result else True,
processing_time=elapsed,
)

76
core/chunker/queue.py Normal file
View File

@@ -0,0 +1,76 @@
"""
ChunkQueue — bounded, thread-safe queue with sentinel-based shutdown.
Demonstrates: Core data structures — queue.Queue (Interview Topic 5).
"""
import queue
from typing import Optional
from .models import Chunk
# Sentinel value to signal workers to stop
_SENTINEL = object()
class ChunkQueue:
"""
Thread-safe bounded queue for chunks.
Provides backpressure: producers block when the queue is full,
preventing unbounded memory usage.
Args:
maxsize: Maximum number of chunks in the queue (default: 10)
"""
def __init__(self, maxsize: int = 10):
self._queue: queue.Queue = queue.Queue(maxsize=maxsize)
self._closed = False
self.maxsize = maxsize
def put(self, chunk: Chunk, timeout: Optional[float] = None) -> None:
"""
Add a chunk to the queue. Blocks if full (backpressure).
Args:
chunk: The chunk to enqueue
timeout: Max seconds to wait (None = block forever)
Raises:
queue.Full: If timeout expires while queue is full
"""
self._queue.put(chunk, timeout=timeout)
def get(self, timeout: Optional[float] = None) -> Optional[Chunk]:
"""
Get next chunk from queue. Returns None if queue is closed.
Args:
timeout: Max seconds to wait (None = block forever)
Returns:
Chunk or None (if sentinel received, meaning queue is closed)
Raises:
queue.Empty: If timeout expires while queue is empty
"""
item = self._queue.get(timeout=timeout)
if item is _SENTINEL:
# Re-put sentinel so other workers also see it
self._queue.put(_SENTINEL)
return None
return item
def close(self) -> None:
"""Signal all consumers to stop by inserting a sentinel."""
self._closed = True
self._queue.put(_SENTINEL)
@property
def is_closed(self) -> bool:
return self._closed
def qsize(self) -> int:
"""Current number of items in the queue (approximate)."""
return self._queue.qsize()

143
core/chunker/worker.py Normal file
View File

@@ -0,0 +1,143 @@
"""
Worker — pulls chunks from queue, processes with retry logic.
Demonstrates:
- Exception handling and resilient code (Interview Topic 7)
- Concurrency (Interview Topic 2) — workers run in thread pool
"""
import logging
import queue
import time
from typing import Any, Callable, Dict, Optional
from .exceptions import ProcessorFailureError
from .models import Chunk, ChunkResult
from .processor import Processor
from .queue import ChunkQueue
logger = logging.getLogger(__name__)
class Worker:
"""
Processes chunks from a queue with retry and exponential backoff.
Args:
worker_id: Identifier for this worker (e.g. "worker-0")
chunk_queue: Source queue to pull chunks from
processor: Processor instance to use
max_retries: Maximum retry attempts per chunk (default: 3)
event_callback: Optional callback for real-time status updates
"""
def __init__(
self,
worker_id: str,
chunk_queue: ChunkQueue,
processor: Processor,
max_retries: int = 3,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
):
self.worker_id = worker_id
self.chunk_queue = chunk_queue
self.processor = processor
self.max_retries = max_retries
self.event_callback = event_callback
self.processed_count = 0
self.error_count = 0
self.retry_count = 0
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
"""Emit an event if callback is registered."""
if self.event_callback:
self.event_callback(event_type, {"worker_id": self.worker_id, **data})
def _process_with_retry(self, chunk: Chunk) -> ChunkResult:
"""
Process a chunk with exponential backoff retry.
Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt)
"""
last_error = None
for attempt in range(self.max_retries + 1):
try:
if attempt > 0:
backoff = 0.1 * (2 ** (attempt - 1))
self._emit("chunk_retry", {
"sequence": chunk.sequence,
"attempt": attempt,
"backoff": backoff,
})
time.sleep(backoff)
self.retry_count += 1
result = self.processor.process(chunk)
result.retries = attempt
result.worker_id = self.worker_id
return result
except Exception as e:
last_error = e
logger.warning(
f"{self.worker_id}: chunk {chunk.sequence} "
f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}"
)
# All retries exhausted
self.error_count += 1
self._emit("chunk_error", {
"sequence": chunk.sequence,
"error": str(last_error),
"retries": self.max_retries,
})
return ChunkResult(
sequence=chunk.sequence,
success=False,
processing_time=0.0,
error=str(last_error),
retries=self.max_retries,
worker_id=self.worker_id,
)
def run(self) -> list[ChunkResult]:
"""
Main worker loop — pull chunks and process until queue is closed.
Returns:
List of ChunkResults processed by this worker
"""
results = []
self._emit("worker_status", {"state": "idle"})
while True:
try:
chunk = self.chunk_queue.get(timeout=1.0)
except queue.Empty:
continue
if chunk is None: # Sentinel received
break
self._emit("chunk_processing", {
"sequence": chunk.sequence,
"state": "processing",
"queue_size": self.chunk_queue.qsize(),
})
result = self._process_with_retry(chunk)
results.append(result)
self.processed_count += 1
self._emit("chunk_done", {
"sequence": chunk.sequence,
"success": result.success,
"processing_time": result.processing_time,
"retries": result.retries,
"queue_size": self.chunk_queue.qsize(),
})
self._emit("worker_status", {"state": "stopped"})
return results

40
core/events.py Normal file
View File

@@ -0,0 +1,40 @@
"""
Redis-based event bus for pipeline job progress.
Celery workers push events, SSE endpoints poll them.
Only depends on redis — safe to import from any context.
"""
import json
import os
import redis
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
def _get_redis():
return redis.from_url(REDIS_URL, decode_responses=True)
def push_event(job_id: str, event_type: str, data: dict) -> None:
"""Push an event to the Redis list for a job."""
r = _get_redis()
key = f"chunk_events:{job_id}"
event = json.dumps({"event": event_type, **data})
r.rpush(key, event)
r.expire(key, 3600)
def poll_events(job_id: str, cursor: int = 0) -> tuple[list[dict], int]:
"""Poll new events from Redis. Returns (events, new_cursor)."""
r = _get_redis()
key = f"chunk_events:{job_id}"
raw_events = r.lrange(key, cursor, -1)
parsed = []
for raw in raw_events:
try:
parsed.append(json.loads(raw))
except (json.JSONDecodeError, TypeError):
pass
return parsed, cursor + len(raw_events)

15
core/jobs/__init__.py Normal file
View File

@@ -0,0 +1,15 @@
"""
MPR Jobs Module
Provides executor abstraction and task dispatch for job processing.
"""
from .executor import Executor, LocalExecutor, get_executor
from .task import run_job
__all__ = [
"Executor",
"LocalExecutor",
"get_executor",
"run_job",
]

View File

@@ -1,17 +1,16 @@
"""
Executor abstraction for job processing.
Supports different backends:
- LocalExecutor: FFmpeg via Celery (default)
- LambdaExecutor: AWS Lambda (future)
Determines WHERE jobs run:
- LocalExecutor: delegates to registered Handler (default)
- LambdaExecutor: AWS Step Functions
- GCPExecutor: Google Cloud Run Jobs
"""
import os
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Optional
from core.ffmpeg.transcode import TranscodeConfig, transcode
# Configuration from environment
MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
@@ -22,26 +21,18 @@ class Executor(ABC):
@abstractmethod
def run(
self,
job_type: str,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""
Execute a transcode/trim job.
Execute a job.
Args:
job_type: Type of job ("transcode", "chunk", etc.)
job_id: Unique job identifier
source_path: Path to source file
output_path: Path for output file
preset: Transcode preset dict (optional, None = trim only)
trim_start: Trim start time in seconds (optional)
trim_end: Trim end time in seconds (optional)
duration: Source duration in seconds (for progress calculation)
payload: Job-type-specific configuration dict
progress_callback: Called with (percent, details_dict)
Returns:
@@ -51,62 +42,25 @@ class Executor(ABC):
class LocalExecutor(Executor):
"""Execute jobs locally using FFmpeg."""
"""Execute jobs locally using registered handlers."""
def run(
self,
job_type: str,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Execute job using local FFmpeg."""
"""Execute job using the appropriate local handler."""
from .registry import get_handler
# Build config from preset or use stream copy for trim-only
if preset:
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
# Trim-only: stream copy
config = TranscodeConfig(
input_path=source_path,
output_path=output_path,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
# Wrapper to convert float percent to int
def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
if progress_callback:
progress_callback(int(percent), details)
return transcode(
config,
duration=duration,
progress_callback=wrapped_callback if progress_callback else None,
handler = get_handler(job_type)
result = handler.process(
job_id=job_id,
payload=payload,
progress_callback=progress_callback,
)
return result.get("status") == "completed"
class LambdaExecutor(Executor):
@@ -123,26 +77,18 @@ class LambdaExecutor(Executor):
def run(
self,
job_type: str,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Start a Step Functions execution for this job."""
import json
payload = {
sfn_payload = {
"job_type": job_type,
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
**payload,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
@@ -150,10 +96,9 @@ class LambdaExecutor(Executor):
response = self.sfn.start_execution(
stateMachineArn=self.state_machine_arn,
name=f"mpr-{job_id}",
input=json.dumps(payload),
input=json.dumps(sfn_payload),
)
# Store execution ARN on the job
execution_arn = response["executionArn"]
try:
from core.db import update_job_fields
@@ -179,13 +124,9 @@ class GCPExecutor(Executor):
def run(
self,
job_type: str,
job_id: str,
source_path: str,
output_path: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> bool:
"""Trigger a Cloud Run Job execution for this job."""
@@ -193,14 +134,10 @@ class GCPExecutor(Executor):
from google.cloud import run_v2
payload = {
gcp_payload = {
"job_type": job_type,
"job_id": job_id,
"source_key": source_path,
"output_key": output_path,
"preset": preset,
"trim_start": trim_start,
"trim_end": trim_end,
"duration": duration,
**payload,
"callback_url": self.callback_url,
"api_key": self.callback_api_key,
}
@@ -216,7 +153,8 @@ class GCPExecutor(Executor):
run_v2.RunJobRequest.Overrides.ContainerOverride(
env=[
run_v2.EnvVar(
name="MPR_JOB_PAYLOAD", value=json.dumps(payload)
name="MPR_JOB_PAYLOAD",
value=json.dumps(gcp_payload),
)
]
)

View File

@@ -0,0 +1,5 @@
"""Job handlers — type-specific execution logic."""
from .base import Handler
__all__ = ["Handler"]

View File

@@ -0,0 +1,33 @@
"""
Base Handler ABC — defines the interface for job-type-specific execution logic.
A Handler knows HOW to execute a specific kind of job (transcode, chunk, etc.).
The Executor decides WHERE to run it (local, Lambda, GCP).
"""
from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, Optional
class Handler(ABC):
"""Abstract base class for job handlers."""
@abstractmethod
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
"""
Execute job-specific logic.
Args:
job_id: Unique job identifier
payload: Job-type-specific configuration
progress_callback: Called with (percent, details_dict)
Returns:
Result dict with at least {"status": "completed"} or raises
"""
pass

125
core/jobs/handlers/chunk.py Normal file
View File

@@ -0,0 +1,125 @@
"""
ChunkHandler — job handler that wraps the chunker Pipeline.
Downloads source from S3/MinIO, runs FFmpeg chunking pipeline,
writes mp4 segments + manifest to media/out/chunks/{job_id}/.
Pushes real-time events to Redis for SSE consumption.
"""
import logging
import os
from typing import Any, Callable, Dict, Optional
from core.events import push_event as push_chunk_event
from core.chunker import Pipeline
from core.storage import BUCKET_IN, download_to_temp
from .base import Handler
logger = logging.getLogger(__name__)
MEDIA_OUT_DIR = os.environ.get("MEDIA_OUT_DIR", "/app/media/out")
class ChunkHandler(Handler):
"""
Handles chunk processing jobs by delegating to the chunker Pipeline.
Expected payload keys:
source_key: str — S3 key of the source file in BUCKET_IN
chunk_duration: float — seconds per chunk (default: 10.0)
num_workers: int — concurrent workers (default: 4)
max_retries: int — retries per chunk (default: 3)
processor_type: str — "ffmpeg", "checksum", "simulated_decode", "composite"
queue_size: int — max queue depth (default: 10)
"""
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
source_key = payload["source_key"]
processor_type = payload.get("processor_type", "ffmpeg")
logger.info(f"ChunkHandler starting job {job_id}: {source_key}")
# Download source from S3/MinIO
push_chunk_event(job_id, "pipeline_start", {"status": "downloading", "source_key": source_key})
tmp_source = download_to_temp(BUCKET_IN, source_key)
# Output directory: media/out/chunks/{job_id}/
output_dir = os.path.join(MEDIA_OUT_DIR, "chunks", job_id)
if processor_type == "ffmpeg":
os.makedirs(output_dir, exist_ok=True)
try:
def event_bridge(event_type: str, data: Dict[str, Any]) -> None:
"""Bridge pipeline events to Redis + optional progress callback."""
push_chunk_event(job_id, event_type, data)
if progress_callback and event_type == "pipeline_complete":
progress_callback(100, data)
elif progress_callback and event_type == "chunk_done":
total = data.get("total_chunks", 1)
if total > 0:
pct = min(int((data.get("sequence", 0) + 1) / total * 100), 99)
progress_callback(pct, data)
pipeline = Pipeline(
source=tmp_source,
chunk_duration=payload.get("chunk_duration", 10.0),
num_workers=payload.get("num_workers", 4),
max_retries=payload.get("max_retries", 3),
processor_type=processor_type,
queue_size=payload.get("queue_size", 10),
event_callback=event_bridge,
output_dir=output_dir if processor_type == "ffmpeg" else None,
start_time=payload.get("start_time"),
end_time=payload.get("end_time"),
)
result = pipeline.run()
# Files are already in media/out/chunks/{job_id}/
output_prefix = f"chunks/{job_id}"
output_files = [
f"{output_prefix}/{os.path.basename(f)}"
for f in result.chunk_files
]
push_chunk_event(job_id, "pipeline_complete", {
"status": "completed",
"total_chunks": result.total_chunks,
"processed": result.processed,
"failed": result.failed,
"elapsed": result.elapsed_time,
"throughput_mbps": result.throughput_mbps,
})
return {
"status": "completed" if result.failed == 0 else "completed_with_errors",
"total_chunks": result.total_chunks,
"processed": result.processed,
"failed": result.failed,
"retries": result.retries,
"elapsed_time": result.elapsed_time,
"throughput_mbps": result.throughput_mbps,
"worker_stats": result.worker_stats,
"errors": result.errors,
"chunks_in_order": result.chunks_in_order,
"output_prefix": output_prefix,
"output_files": output_files,
}
except Exception as e:
push_chunk_event(job_id, "pipeline_error", {"status": "failed", "error": str(e)})
raise
finally:
# Cleanup temp source file only (output dir is persistent)
try:
os.unlink(tmp_source)
except OSError:
pass

View File

@@ -0,0 +1,104 @@
"""
TranscodeHandler — executes transcode/trim jobs using FFmpeg.
Extracted from the old tasks.py Celery task logic.
"""
import logging
import os
import tempfile
from pathlib import Path
from typing import Any, Callable, Dict, Optional
from core.ffmpeg.transcode import TranscodeConfig, transcode
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from .base import Handler
logger = logging.getLogger(__name__)
class TranscodeHandler(Handler):
"""Handle transcode and trim jobs via FFmpeg."""
def process(
self,
job_id: str,
payload: Dict[str, Any],
progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
) -> Dict[str, Any]:
source_key = payload["source_key"]
output_key = payload["output_key"]
preset = payload.get("preset")
trim_start = payload.get("trim_start")
trim_end = payload.get("trim_end")
duration = payload.get("duration")
logger.info(f"TranscodeHandler: {source_key} -> {output_key}")
# Download source
tmp_source = download_to_temp(BUCKET_IN, source_key)
ext = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext)
os.close(fd)
try:
if preset:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec=preset.get("video_codec", "libx264"),
video_bitrate=preset.get("video_bitrate"),
video_crf=preset.get("video_crf"),
video_preset=preset.get("video_preset"),
resolution=preset.get("resolution"),
framerate=preset.get("framerate"),
audio_codec=preset.get("audio_codec", "aac"),
audio_bitrate=preset.get("audio_bitrate"),
audio_channels=preset.get("audio_channels"),
audio_samplerate=preset.get("audio_samplerate"),
container=preset.get("container", "mp4"),
extra_args=preset.get("extra_args", []),
trim_start=trim_start,
trim_end=trim_end,
)
else:
config = TranscodeConfig(
input_path=tmp_source,
output_path=tmp_output,
video_codec="copy",
audio_codec="copy",
trim_start=trim_start,
trim_end=trim_end,
)
def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
if progress_callback:
progress_callback(int(percent), details)
success = transcode(
config,
duration=duration,
progress_callback=wrapped_callback if progress_callback else None,
)
if not success:
raise RuntimeError("Transcode returned False")
# Upload result
logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
upload_file(tmp_output, BUCKET_OUT, output_key)
return {
"status": "completed",
"job_id": job_id,
"output_key": output_key,
}
finally:
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass

33
core/jobs/registry.py Normal file
View File

@@ -0,0 +1,33 @@
"""
Handler registry — maps job_type strings to Handler classes.
"""
from typing import Dict, Type
from .handlers.base import Handler
_handlers: Dict[str, Type[Handler]] = {}
def register_handler(job_type: str, handler_class: Type[Handler]) -> None:
"""Register a handler class for a job type."""
_handlers[job_type] = handler_class
def get_handler(job_type: str) -> Handler:
"""Get an instantiated handler for a job type."""
if job_type not in _handlers:
raise ValueError(f"Unknown job type: {job_type}")
return _handlers[job_type]()
def _register_defaults() -> None:
"""Register built-in handlers."""
from .handlers.chunk import ChunkHandler
from .handlers.transcode import TranscodeHandler
register_handler("transcode", TranscodeHandler)
register_handler("chunk", ChunkHandler)
_register_defaults()

64
core/jobs/task.py Normal file
View File

@@ -0,0 +1,64 @@
"""
Celery task for job processing.
Generic dispatcher — routes to the appropriate handler based on job_type.
"""
import logging
from typing import Any, Dict
from celery import shared_task
from core.rpc.server import update_job_progress
logger = logging.getLogger(__name__)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def run_job(
self,
job_type: str,
job_id: str,
payload: Dict[str, Any],
) -> Dict[str, Any]:
"""
Generic Celery task — dispatches to the registered handler for job_type.
"""
logger.info(f"Starting {job_type} job {job_id}")
update_job_progress(job_id, progress=0, status="processing")
def progress_callback(percent: int, details: Dict[str, Any]) -> None:
update_job_progress(
job_id,
progress=percent,
current_time=details.get("time", 0.0),
status="processing",
)
try:
from .registry import get_handler
handler = get_handler(job_type)
result = handler.process(
job_id=job_id,
payload=payload,
progress_callback=progress_callback,
)
logger.info(f"Job {job_id} completed successfully")
update_job_progress(job_id, progress=100, status="completed")
return result
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
update_job_progress(job_id, progress=0, status="failed", error=str(e))
if self.request.retries < self.max_retries:
raise self.retry(exc=e)
return {
"status": "failed",
"job_id": job_id,
"error": str(e),
}

View File

@@ -11,6 +11,7 @@ service WorkerService {
rpc StreamProgress(ProgressRequest) returns (stream ProgressUpdate);
rpc CancelJob(CancelRequest) returns (CancelResponse);
rpc GetWorkerStatus(Empty) returns (WorkerStatus);
rpc StreamChunkPipeline(ChunkStreamRequest) returns (stream ChunkPipelineEvent);
}
message JobRequest {
@@ -62,3 +63,24 @@ message WorkerStatus {
message Empty {
// Empty
}
message ChunkStreamRequest {
string job_id = 1;
}
message ChunkPipelineEvent {
string job_id = 1;
string event_type = 2;
int32 sequence = 3;
string worker_id = 4;
string state = 5;
int32 queue_size = 6;
float elapsed = 7;
float throughput_mbps = 8;
int32 total_chunks = 9;
int32 processed_chunks = 10;
int32 failed_chunks = 11;
string error = 12;
float processing_time = 13;
int32 retries = 14;
}

View File

@@ -59,17 +59,24 @@ class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer):
# Dispatch to Celery if available
if self.celery_app:
from core.task.tasks import run_transcode_job
from core.jobs.task import run_job
task = run_transcode_job.delay(
job_id=job_id,
source_path=request.source_path,
output_path=request.output_path,
preset=preset,
trim_start=request.trim_start
payload = {
"source_key": request.source_path,
"output_key": request.output_path,
"preset": preset,
"trim_start": request.trim_start
if request.HasField("trim_start")
else None,
trim_end=request.trim_end if request.HasField("trim_end") else None,
"trim_end": request.trim_end
if request.HasField("trim_end")
else None,
}
task = run_job.delay(
job_type="transcode",
job_id=job_id,
payload=payload,
)
_active_jobs[job_id]["celery_task_id"] = task.id
@@ -166,6 +173,43 @@ class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer):
message="Job not found",
)
def StreamChunkPipeline(self, request, context) -> Iterator[worker_pb2.ChunkPipelineEvent]:
"""Stream chunk pipeline events for a job."""
from core.events import poll_events
job_id = request.job_id
logger.info(f"StreamChunkPipeline: {job_id}")
cursor = 0
timeout = time.monotonic() + 600 # 10 min max
while context.is_active() and time.monotonic() < timeout:
events, cursor = poll_events(job_id, cursor)
for data in events:
event_type = data.pop("event", "")
yield worker_pb2.ChunkPipelineEvent(
job_id=job_id,
event_type=event_type,
sequence=data.get("sequence", 0),
worker_id=data.get("worker_id", ""),
state=data.get("state", ""),
queue_size=data.get("queue_size", 0),
elapsed=data.get("elapsed", 0.0),
throughput_mbps=data.get("throughput_mbps", 0.0),
total_chunks=data.get("total_chunks", 0),
processed_chunks=data.get("processed_chunks", 0),
failed_chunks=data.get("failed_chunks", 0),
error=data.get("error", ""),
processing_time=data.get("processing_time", 0.0),
retries=data.get("retries", 0),
)
if event_type in ("pipeline_complete", "pipeline_error"):
return
time.sleep(0.05)
def GetWorkerStatus(self, request, context):
"""Get worker health and capabilities."""
try:
@@ -197,11 +241,14 @@ def update_job_progress(
speed: float = 0.0,
status: str = "processing",
error: str = None,
**extra,
) -> None:
"""
Update job progress (called from worker tasks).
Updates both the in-memory gRPC state and the Django database.
Extra kwargs are stored for chunker-specific fields (total_chunks,
processed_chunks, failed_chunks, throughput_mbps, etc.).
"""
if job_id in _active_jobs:
_active_jobs[job_id].update(
@@ -212,6 +259,7 @@ def update_job_progress(
"speed": speed,
"status": status,
"error": error,
**extra,
}
)

View File

@@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cworker.proto\x12\nmpr.worker\"\xa7\x01\n\nJobRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x13\n\x0bsource_path\x18\x02 \x01(\t\x12\x13\n\x0boutput_path\x18\x03 \x01(\t\x12\x13\n\x0bpreset_json\x18\x04 \x01(\t\x12\x17\n\ntrim_start\x18\x05 \x01(\x02H\x00\x88\x01\x01\x12\x15\n\x08trim_end\x18\x06 \x01(\x02H\x01\x88\x01\x01\x42\r\n\x0b_trim_startB\x0b\n\t_trim_end\"@\n\x0bJobResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08\x61\x63\x63\x65pted\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"!\n\x0fProgressRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x9c\x01\n\x0eProgressUpdate\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08progress\x18\x02 \x01(\x05\x12\x15\n\rcurrent_frame\x18\x03 \x01(\x05\x12\x14\n\x0c\x63urrent_time\x18\x04 \x01(\x02\x12\r\n\x05speed\x18\x05 \x01(\x02\x12\x0e\n\x06status\x18\x06 \x01(\t\x12\x12\n\x05\x65rror\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_error\"\x1f\n\rCancelRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"D\n\x0e\x43\x61ncelResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x11\n\tcancelled\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"g\n\x0cWorkerStatus\x12\x11\n\tavailable\x18\x01 \x01(\x08\x12\x13\n\x0b\x61\x63tive_jobs\x18\x02 \x01(\x05\x12\x18\n\x10supported_codecs\x18\x03 \x03(\t\x12\x15\n\rgpu_available\x18\x04 \x01(\x08\"\x07\n\x05\x45mpty2\x9e\x02\n\rWorkerService\x12<\n\tSubmitJob\x12\x16.mpr.worker.JobRequest\x1a\x17.mpr.worker.JobResponse\x12K\n\x0eStreamProgress\x12\x1b.mpr.worker.ProgressRequest\x1a\x1a.mpr.worker.ProgressUpdate0\x01\x12\x42\n\tCancelJob\x12\x19.mpr.worker.CancelRequest\x1a\x1a.mpr.worker.CancelResponse\x12>\n\x0fGetWorkerStatus\x12\x11.mpr.worker.Empty\x1a\x18.mpr.worker.WorkerStatusb\x06proto3')
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cworker.proto\x12\nmpr.worker\"\xa7\x01\n\nJobRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x13\n\x0bsource_path\x18\x02 \x01(\t\x12\x13\n\x0boutput_path\x18\x03 \x01(\t\x12\x13\n\x0bpreset_json\x18\x04 \x01(\t\x12\x17\n\ntrim_start\x18\x05 \x01(\x02H\x00\x88\x01\x01\x12\x15\n\x08trim_end\x18\x06 \x01(\x02H\x01\x88\x01\x01\x42\r\n\x0b_trim_startB\x0b\n\t_trim_end\"@\n\x0bJobResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08\x61\x63\x63\x65pted\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"!\n\x0fProgressRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x9c\x01\n\x0eProgressUpdate\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x10\n\x08progress\x18\x02 \x01(\x05\x12\x15\n\rcurrent_frame\x18\x03 \x01(\x05\x12\x14\n\x0c\x63urrent_time\x18\x04 \x01(\x02\x12\r\n\x05speed\x18\x05 \x01(\x02\x12\x0e\n\x06status\x18\x06 \x01(\t\x12\x12\n\x05\x65rror\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_error\"\x1f\n\rCancelRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"D\n\x0e\x43\x61ncelResponse\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x11\n\tcancelled\x18\x02 \x01(\x08\x12\x0f\n\x07message\x18\x03 \x01(\t\"g\n\x0cWorkerStatus\x12\x11\n\tavailable\x18\x01 \x01(\x08\x12\x13\n\x0b\x61\x63tive_jobs\x18\x02 \x01(\x05\x12\x18\n\x10supported_codecs\x18\x03 \x03(\t\x12\x15\n\rgpu_available\x18\x04 \x01(\x08\"\x07\n\x05\x45mpty\"$\n\x12\x43hunkStreamRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\xaa\x02\n\x12\x43hunkPipelineEvent\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12\x12\n\nevent_type\x18\x02 \x01(\t\x12\x10\n\x08sequence\x18\x03 \x01(\x05\x12\x11\n\tworker_id\x18\x04 \x01(\t\x12\r\n\x05state\x18\x05 \x01(\t\x12\x12\n\nqueue_size\x18\x06 \x01(\x05\x12\x0f\n\x07\x65lapsed\x18\x07 \x01(\x02\x12\x17\n\x0fthroughput_mbps\x18\x08 \x01(\x02\x12\x14\n\x0ctotal_chunks\x18\t \x01(\x05\x12\x18\n\x10processed_chunks\x18\n \x01(\x05\x12\x15\n\rfailed_chunks\x18\x0b \x01(\x05\x12\r\n\x05\x65rror\x18\x0c \x01(\t\x12\x17\n\x0fprocessing_time\x18\r \x01(\x02\x12\x0f\n\x07retries\x18\x0e \x01(\x05\x32\xf7\x02\n\rWorkerService\x12<\n\tSubmitJob\x12\x16.mpr.worker.JobRequest\x1a\x17.mpr.worker.JobResponse\x12K\n\x0eStreamProgress\x12\x1b.mpr.worker.ProgressRequest\x1a\x1a.mpr.worker.ProgressUpdate0\x01\x12\x42\n\tCancelJob\x12\x19.mpr.worker.CancelRequest\x1a\x1a.mpr.worker.CancelResponse\x12>\n\x0fGetWorkerStatus\x12\x11.mpr.worker.Empty\x1a\x18.mpr.worker.WorkerStatus\x12W\n\x13StreamChunkPipeline\x12\x1e.mpr.worker.ChunkStreamRequest\x1a\x1e.mpr.worker.ChunkPipelineEvent0\x01\x62\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -47,6 +47,10 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals['_WORKERSTATUS']._serialized_end=664
_globals['_EMPTY']._serialized_start=666
_globals['_EMPTY']._serialized_end=673
_globals['_WORKERSERVICE']._serialized_start=676
_globals['_WORKERSERVICE']._serialized_end=962
_globals['_CHUNKSTREAMREQUEST']._serialized_start=675
_globals['_CHUNKSTREAMREQUEST']._serialized_end=711
_globals['_CHUNKPIPELINEEVENT']._serialized_start=714
_globals['_CHUNKPIPELINEEVENT']._serialized_end=1012
_globals['_WORKERSERVICE']._serialized_start=1015
_globals['_WORKERSERVICE']._serialized_end=1390
# @@protoc_insertion_point(module_scope)

View File

@@ -5,7 +5,7 @@ import warnings
from . import worker_pb2 as worker__pb2
GRPC_GENERATED_VERSION = '1.76.0'
GRPC_GENERATED_VERSION = '1.78.0'
GRPC_VERSION = grpc.__version__
_version_not_supported = False
@@ -54,6 +54,11 @@ class WorkerServiceStub(object):
request_serializer=worker__pb2.Empty.SerializeToString,
response_deserializer=worker__pb2.WorkerStatus.FromString,
_registered_method=True)
self.StreamChunkPipeline = channel.unary_stream(
'/mpr.worker.WorkerService/StreamChunkPipeline',
request_serializer=worker__pb2.ChunkStreamRequest.SerializeToString,
response_deserializer=worker__pb2.ChunkPipelineEvent.FromString,
_registered_method=True)
class WorkerServiceServicer(object):
@@ -83,6 +88,12 @@ class WorkerServiceServicer(object):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def StreamChunkPipeline(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_WorkerServiceServicer_to_server(servicer, server):
rpc_method_handlers = {
@@ -106,6 +117,11 @@ def add_WorkerServiceServicer_to_server(servicer, server):
request_deserializer=worker__pb2.Empty.FromString,
response_serializer=worker__pb2.WorkerStatus.SerializeToString,
),
'StreamChunkPipeline': grpc.unary_stream_rpc_method_handler(
servicer.StreamChunkPipeline,
request_deserializer=worker__pb2.ChunkStreamRequest.FromString,
response_serializer=worker__pb2.ChunkPipelineEvent.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'mpr.worker.WorkerService', rpc_method_handlers)
@@ -224,3 +240,30 @@ class WorkerService(object):
timeout,
metadata,
_registered_method=True)
@staticmethod
def StreamChunkPipeline(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_stream(
request,
target,
'/mpr.worker.WorkerService/StreamChunkPipeline',
worker__pb2.ChunkStreamRequest.SerializeToString,
worker__pb2.ChunkPipelineEvent.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)

View File

@@ -13,8 +13,8 @@
},
{
"target": "typescript",
"output": "ui/timeline/src/types.ts",
"include": ["dataclasses", "enums", "api"]
"output": "ui/common/types/generated.ts",
"include": ["dataclasses", "enums", "api", "views"]
},
{
"target": "protobuf",

View File

@@ -16,6 +16,8 @@ from .grpc import (
GRPC_SERVICE,
CancelRequest,
CancelResponse,
ChunkPipelineEvent,
ChunkStreamRequest,
Empty,
JobRequest,
JobResponse,
@@ -23,12 +25,13 @@ from .grpc import (
ProgressUpdate,
WorkerStatus,
)
from .jobs import JobStatus, TranscodeJob
from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
from .media import AssetStatus, MediaAsset
from .presets import BUILTIN_PRESETS, TranscodePreset
from .views import ChunkEvent, ChunkOutputFile, PipelineStats, WorkerEvent
# Core domain models - generates Django, Pydantic, TypeScript
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob]
DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob]
# API request/response models - generates TypeScript only (no Django)
# WorkerStatus from grpc.py is reused here
@@ -42,7 +45,10 @@ API_MODELS = [
]
# Status enums - included in generated code
ENUMS = [AssetStatus, JobStatus]
ENUMS = [AssetStatus, JobStatus, ChunkJobStatus]
# View/event models - generates TypeScript for UI consumption
VIEWS = [ChunkEvent, WorkerEvent, PipelineStats, ChunkOutputFile]
# gRPC messages - generates Proto
GRPC_MESSAGES = [
@@ -54,6 +60,8 @@ GRPC_MESSAGES = [
CancelResponse,
WorkerStatus,
Empty,
ChunkStreamRequest,
ChunkPipelineEvent,
]
__all__ = [
@@ -61,6 +69,7 @@ __all__ = [
"MediaAsset",
"TranscodePreset",
"TranscodeJob",
"ChunkJob",
# API Models
"CreateJobRequest",
"UpdateAssetRequest",
@@ -70,6 +79,7 @@ __all__ = [
# Enums
"AssetStatus",
"JobStatus",
"ChunkJobStatus",
# gRPC
"GRPC_SERVICE",
"JobRequest",
@@ -80,10 +90,18 @@ __all__ = [
"CancelResponse",
"WorkerStatus",
"Empty",
"ChunkStreamRequest",
"ChunkPipelineEvent",
# Views
"ChunkEvent",
"WorkerEvent",
"PipelineStats",
"ChunkOutputFile",
# For generator
"DATACLASSES",
"API_MODELS",
"ENUMS",
"VIEWS",
"GRPC_MESSAGES",
"BUILTIN_PRESETS",
]

View File

@@ -41,6 +41,13 @@ class CancelRequest:
job_id: str
@dataclass
class ChunkStreamRequest:
"""Request to stream chunk pipeline events."""
job_id: str
@dataclass
class Empty:
"""Empty message for requests with no parameters."""
@@ -94,6 +101,26 @@ class WorkerStatus:
gpu_available: bool
@dataclass
class ChunkPipelineEvent:
"""Streaming chunk pipeline event."""
job_id: str
event_type: str # pipeline_start, chunk_queued, chunk_done, etc.
sequence: int = 0
worker_id: str = ""
state: str = ""
queue_size: int = 0
elapsed: float = 0.0
throughput_mbps: float = 0.0
total_chunks: int = 0
processed_chunks: int = 0
failed_chunks: int = 0
error: str = ""
processing_time: float = 0.0
retries: int = 0
# -----------------------------------------------------------------------------
# Service Definition (for documentation, generator uses this)
# -----------------------------------------------------------------------------
@@ -126,5 +153,11 @@ GRPC_SERVICE = {
"response": WorkerStatus,
"stream_response": False,
},
{
"name": "StreamChunkPipeline",
"request": ChunkStreamRequest,
"response": ChunkPipelineEvent,
"stream_response": True, # Server streaming
},
],
}

View File

@@ -1,13 +1,14 @@
"""
TranscodeJob Schema Definition
Job Schema Definitions
Source of truth for job data model.
Source of truth for job data models.
TranscodeJob and ChunkJob share common lifecycle fields by convention.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
from uuid import UUID
@@ -77,3 +78,56 @@ class TranscodeJob:
return self.preset_id is None and (
self.trim_start is not None or self.trim_end is not None
)
class ChunkJobStatus(str, Enum):
"""Status of a chunk pipeline job."""
PENDING = "pending"
CHUNKING = "chunking"
PROCESSING = "processing"
COLLECTING = "collecting"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class ChunkJob:
"""
A chunk pipeline job — splits a media file into chunks and processes them
through a concurrent worker pool.
"""
id: UUID
# Input
source_asset_id: UUID
# Configuration
chunk_duration: float = 10.0 # seconds
num_workers: int = 4
max_retries: int = 3
processor_type: str = "ffmpeg" # "ffmpeg", "checksum", "simulated_decode", "composite"
# Status & Progress
status: ChunkJobStatus = ChunkJobStatus.PENDING
progress: float = 0.0 # 0.0 to 100.0
total_chunks: int = 0
processed_chunks: int = 0
failed_chunks: int = 0
retry_count: int = 0
error_message: Optional[str] = None
# Result stats
throughput_mbps: Optional[float] = None
elapsed_seconds: Optional[float] = None
# Worker tracking
celery_task_id: Optional[str] = None
priority: int = 0 # Lower = higher priority
# Timestamps
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None

View File

@@ -0,0 +1,57 @@
"""
View/Event Schema Definitions
Projections of domain models for UI consumption via SSE events.
These reference existing schema types (e.g., ChunkJobStatus) to maintain
type-level dependencies — if the domain model changes, views update too.
"""
from dataclasses import dataclass
from typing import Optional
@dataclass
class ChunkEvent:
"""SSE event for a single chunk's lifecycle."""
sequence: int
status: str
size: Optional[int] = None
worker_id: Optional[str] = None
processing_time: Optional[float] = None
error: Optional[str] = None
retries: int = 0
@dataclass
class WorkerEvent:
"""SSE event for worker state changes."""
worker_id: str
state: str
current_chunk: Optional[int] = None
processed: int = 0
errors: int = 0
retries: int = 0
@dataclass
class PipelineStats:
"""Aggregate pipeline statistics, updated via SSE."""
total_chunks: int = 0
processed: int = 0
failed: int = 0
retries: int = 0
elapsed: float = 0.0
throughput_mbps: float = 0.0
queue_size: int = 0
@dataclass
class ChunkOutputFile:
"""A chunk output file in S3/MinIO with presigned download URL."""
key: str
size: int = 0
url: str = ""

View File

@@ -1,15 +0,0 @@
"""
MPR Worker Module
Provides executor abstraction and Celery tasks for job processing.
"""
from .executor import Executor, LocalExecutor, get_executor
from .tasks import run_transcode_job
__all__ = [
"Executor",
"LocalExecutor",
"get_executor",
"run_transcode_job",
]

View File

@@ -1,105 +0,0 @@
"""
Celery tasks for job processing.
"""
import logging
import os
from typing import Any, Dict, Optional
from celery import shared_task
from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
from core.rpc.server import update_job_progress
from core.task.executor import get_executor
logger = logging.getLogger(__name__)
@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
def run_transcode_job(
self,
job_id: str,
source_key: str,
output_key: str,
preset: Optional[Dict[str, Any]] = None,
trim_start: Optional[float] = None,
trim_end: Optional[float] = None,
duration: Optional[float] = None,
) -> Dict[str, Any]:
"""
Celery task to run a transcode/trim job.
Downloads source from S3, runs FFmpeg, uploads result to S3.
"""
logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
update_job_progress(job_id, progress=0, status="processing")
# Download source from S3 to temp file
logger.info(f"Downloading {source_key} from {BUCKET_IN}")
tmp_source = download_to_temp(BUCKET_IN, source_key)
# Create temp output path with same extension
import tempfile
from pathlib import Path
ext = Path(output_key).suffix or ".mp4"
fd, tmp_output = tempfile.mkstemp(suffix=ext)
os.close(fd)
def progress_callback(percent: int, details: Dict[str, Any]) -> None:
update_job_progress(
job_id,
progress=percent,
current_time=details.get("time", 0.0),
status="processing",
)
try:
executor = get_executor()
success = executor.run(
job_id=job_id,
source_path=tmp_source,
output_path=tmp_output,
preset=preset,
trim_start=trim_start,
trim_end=trim_end,
duration=duration,
progress_callback=progress_callback,
)
if success:
# Upload result to S3
logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
upload_file(tmp_output, BUCKET_OUT, output_key)
logger.info(f"Job {job_id} completed successfully")
update_job_progress(job_id, progress=100, status="completed")
return {
"status": "completed",
"job_id": job_id,
"output_key": output_key,
}
else:
raise RuntimeError("Executor returned False")
except Exception as e:
logger.exception(f"Job {job_id} failed: {e}")
update_job_progress(job_id, progress=0, status="failed", error=str(e))
if self.request.retries < self.max_retries:
raise self.retry(exc=e)
return {
"status": "failed",
"job_id": job_id,
"error": str(e),
}
finally:
# Clean up temp files
for f in [tmp_source, tmp_output]:
try:
os.unlink(f)
except OSError:
pass

View File

@@ -6,5 +6,6 @@ COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# No COPY . . — code is volume-mounted in dev (..:/app)
# This image only provides the Python runtime + dependencies
CMD ["python", "admin/manage.py", "runserver", "0.0.0.0:8000"]

View File

@@ -10,5 +10,6 @@ COPY requirements.txt requirements-worker.txt ./
RUN pip install --no-cache-dir -r requirements-worker.txt
# No COPY . . — code is volume-mounted in dev (..:/app)
# This image only provides Python runtime + FFmpeg + dependencies
CMD ["celery", "-A", "admin.mpr", "worker", "--loglevel=info"]

View File

@@ -17,6 +17,20 @@ x-healthcheck-defaults: &healthcheck-defaults
timeout: 5s
retries: 5
x-python-service: &python-service
build:
context: ..
dockerfile: ctrl/Dockerfile
volumes:
- ..:/app
environment:
<<: *common-env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
services:
# =============================================================================
# Infrastructure
@@ -75,64 +89,55 @@ services:
mc anonymous set download local/mpr-media-in
mc anonymous set download local/mpr-media-out
envoy:
image: envoyproxy/envoy:v1.28-latest
ports:
- "8090:8090"
volumes:
- ./envoy.yaml:/etc/envoy/envoy.yaml:ro
depends_on:
- grpc
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./landing.html:/etc/nginx/landing.html:ro
- ../media/out:/app/media/out:ro
depends_on:
- django
- fastapi
- timeline
- chunker
- minio
- envoy
# =============================================================================
# Application Services
# =============================================================================
django:
build:
context: ..
dockerfile: ctrl/Dockerfile
<<: *python-service
command: >
bash -c "python admin/manage.py migrate &&
python admin/manage.py loadbuiltins || true &&
python admin/manage.py runserver 0.0.0.0:8701"
ports:
- "8701:8701"
environment:
<<: *common-env
volumes:
- ..:/app
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
fastapi:
build:
context: ..
dockerfile: ctrl/Dockerfile
<<: *python-service
command: uvicorn core.api.main:app --host 0.0.0.0 --port 8702 --reload
ports:
- "8702:8702"
environment:
<<: *common-env
DJANGO_ALLOW_ASYNC_UNSAFE: "true"
volumes:
- ..:/app
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
grpc:
build:
context: ..
dockerfile: ctrl/Dockerfile
<<: *python-service
command: python -m core.rpc.server
ports:
- "50052:50051"
@@ -140,19 +145,12 @@ services:
<<: *common-env
GRPC_PORT: 50051
GRPC_MAX_WORKERS: 10
volumes:
- ..:/app
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
celery:
build:
context: ..
dockerfile: ctrl/Dockerfile.worker
command: celery -A admin.mpr worker -l info -Q transcode -c 2
command: celery -A admin.mpr worker -l info -Q celery,transcode -c 2
environment:
<<: *common-env
MPR_EXECUTOR: local
@@ -176,6 +174,21 @@ services:
VITE_ALLOWED_HOSTS: ${VITE_ALLOWED_HOSTS:-}
volumes:
- ../ui/timeline/src:/app/src
- ../ui/timeline/vite.config.ts:/app/vite.config.ts
- ../ui/common:/common
chunker:
build:
context: ../ui/chunker
dockerfile: Dockerfile
ports:
- "5174:5174"
environment:
VITE_ALLOWED_HOSTS: ${VITE_ALLOWED_HOSTS:-}
volumes:
- ../ui/chunker/src:/app/src
- ../ui/chunker/vite.config.ts:/app/vite.config.ts
- ../ui/common:/common
volumes:
postgres-data:

64
ctrl/envoy.yaml Normal file
View File

@@ -0,0 +1,64 @@
admin:
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
- name: listener_0
address:
socket_address: { address: 0.0.0.0, port_value: 8090 }
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
codec_type: auto
stat_prefix: ingress_http
route_config:
name: local_route
virtual_hosts:
- name: local_service
domains: ["*"]
routes:
- match: { prefix: "/" }
route:
cluster: grpc_service
timeout: 600s
max_stream_duration:
grpc_timeout_header_max: 600s
cors:
allow_origin_string_match:
- prefix: "*"
allow_methods: GET, PUT, DELETE, POST, OPTIONS
allow_headers: keep-alive,user-agent,cache-control,content-type,content-transfer-encoding,x-accept-content-transfer-encoding,x-accept-response-streaming,x-user-agent,x-grpc-web,grpc-timeout
expose_headers: grpc-status,grpc-message
max_age: "1728000"
http_filters:
- name: envoy.filters.http.grpc_web
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.grpc_web.v3.GrpcWeb
- name: envoy.filters.http.cors
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: grpc_service
connect_timeout: 5s
type: logical_dns
lb_policy: round_robin
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options: {}
load_assignment:
cluster_name: grpc_service
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: grpc
port_value: 50051

View File

@@ -19,4 +19,13 @@ python -m grpc_tools.protoc \
# Fix relative import in generated grpc stub
sed -i 's/^import worker_pb2/from . import worker_pb2/' core/rpc/worker_pb2_grpc.py
# Generate TypeScript gRPC-Web client from proto
echo "Generating TypeScript gRPC-Web client..."
cd ui/chunker
npx protoc \
--ts_out ../common/api/grpc \
--proto_path ../../core/rpc/protos \
worker.proto
cd ../..
echo "Done!"

View File

@@ -14,8 +14,8 @@ COPY ctrl/lambda/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY core/task/lambda_handler.py ${LAMBDA_TASK_ROOT}/core/task/lambda_handler.py
COPY core/task/__init__.py ${LAMBDA_TASK_ROOT}/core/task/__init__.py
COPY core/jobs/lambda_handler.py ${LAMBDA_TASK_ROOT}/core/jobs/lambda_handler.py
COPY core/jobs/__init__.py ${LAMBDA_TASK_ROOT}/core/jobs/__init__.py
COPY core/ ${LAMBDA_TASK_ROOT}/core/
CMD ["core.task.lambda_handler.handler"]
CMD ["core.jobs.lambda_handler.handler"]

107
ctrl/landing.html Normal file
View File

@@ -0,0 +1,107 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MPR</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, monospace;
background: #0f0f0f;
color: #e0e0e0;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
}
.container {
text-align: center;
max-width: 600px;
padding: 2rem;
}
h1 {
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
margin-bottom: 0.5rem;
}
.subtitle {
color: #666;
font-size: 0.9rem;
margin-bottom: 2.5rem;
}
.cards {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
a.card {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.75rem;
padding: 2rem 1.5rem;
background: #141414;
border: 1px solid #2a2a2a;
border-radius: 12px;
text-decoration: none;
color: #e0e0e0;
transition: all 0.2s;
}
a.card:hover {
border-color: #3b82f6;
background: #1a1a2e;
transform: translateY(-2px);
}
.card-icon {
font-size: 2.5rem;
line-height: 1;
}
.card-title {
font-size: 1.1rem;
font-weight: 600;
}
.card-desc {
font-size: 0.75rem;
color: #666;
line-height: 1.4;
}
.links {
margin-top: 2rem;
display: flex;
gap: 1.5rem;
justify-content: center;
}
.links a {
color: #555;
font-size: 0.75rem;
text-decoration: none;
transition: color 0.2s;
}
.links a:hover { color: #94a3b8; }
</style>
</head>
<body>
<div class="container">
<h1>MPR</h1>
<p class="subtitle">Media Processing & Review</p>
<div class="cards">
<a class="card" href="/timeline/">
<div class="card-icon">&#9654;</div>
<div class="card-title">Timeline</div>
<div class="card-desc">Browse assets, trim, transcode</div>
</a>
<a class="card" href="/chunker/">
<div class="card-icon">&#9638;</div>
<div class="card-title">Chunker</div>
<div class="card-desc">Split media into segments, pipeline visualization</div>
</a>
</div>
<div class="links">
<a href="/admin/">Admin</a>
<a href="/api/graphql">GraphQL</a>
</div>
</div>
</body>
</html>

View File

@@ -21,14 +21,28 @@ http {
server timeline:5173;
}
upstream chunker {
server chunker:5174;
}
upstream minio {
server minio:9000;
}
upstream envoy {
server envoy:8090;
}
server {
listen 80;
server_name mpr.local.ar;
# Landing page
location = / {
root /etc/nginx;
try_files /landing.html =404;
}
# Django Admin
location /admin {
proxy_pass http://django;
@@ -54,7 +68,7 @@ http {
}
# Timeline UI
location /ui {
location /timeline/ {
proxy_pass http://timeline;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
@@ -62,8 +76,17 @@ http {
proxy_set_header Connection "upgrade";
}
# Vite HMR websocket
location /@vite {
# Chunker UI
location /chunker/ {
proxy_pass http://chunker;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# Vite HMR websocket (timeline)
location /timeline/@vite {
proxy_pass http://timeline;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
@@ -71,6 +94,15 @@ http {
proxy_set_header Host $host;
}
# Vite HMR websocket (chunker)
location /chunker/@vite {
proxy_pass http://chunker;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
}
# Media files - proxied from MinIO (local) or S3 (AWS)
location /media/in/ {
proxy_pass http://minio/mpr-media-in/;
@@ -78,16 +110,24 @@ http {
}
location /media/out/ {
proxy_pass http://minio/mpr-media-out/;
proxy_set_header Host $http_host;
alias /app/media/out/;
autoindex on;
}
# Default to Timeline UI
location / {
proxy_pass http://timeline;
# gRPC-Web proxy via Envoy
location /grpc-web/ {
proxy_pass http://envoy/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 600s;
# Critical for streaming: disable nginx response buffering
proxy_buffering off;
proxy_cache off;
chunked_transfer_encoding on;
}
}
}

View File

@@ -0,0 +1,290 @@
# Chunker Pipeline — Execution Path
## Overview
The chunker pipeline splits a media file into time-based segments using FFmpeg stream-copy. Events flow from worker threads through Redis and gRPC-Web streaming to the browser UI in real time.
**7 hops from worker thread to pixel:**
```
Worker thread → Pipeline._emit() → event_bridge() → Redis RPUSH
→ [50ms poll] gRPC server LRANGE → yield protobuf
→ HTTP/2 frame → Envoy (grpc-web filter)
→ HTTP/1.1 chunk → nginx (proxy_buffering off)
→ fetch ReadableStream → protobuf-ts decode
→ setEvents([...prev, evt]) → React re-render
```
---
## Step 1: Job Creation (Browser → GraphQL → Celery)
```
User clicks "Start"
→ App.tsx: handleStart(config)
→ api.ts: createChunkJob(config)
→ POST /graphql (nginx :80 → fastapi:8702)
→ graphql.py: Mutation.create_chunk_job()
→ core.db: creates ChunkJob row in Postgres
→ Celery: run_job.delay(job_type="chunk", job_id=..., payload=...)
→ Returns { id, celery_task_id } to browser
→ App.tsx: setJobId(id) — triggers gRPC stream subscription
```
**Files:** `ui/chunker/src/api.ts`, `core/api/graphql.py`, `core/jobs/task.py`
---
## Step 2: gRPC-Web Stream (Browser → nginx → Envoy → gRPC Server)
Once `jobId` is set, `useGrpcStream(jobId)` opens a server-streaming RPC:
```
useGrpcStream(jobId) fires useEffect
→ GrpcWebFetchTransport({ baseUrl: "/grpc-web" })
→ WorkerServiceClient.streamChunkPipeline({ jobId })
→ fetch() POST to /grpc-web/worker.WorkerService/StreamChunkPipeline
→ nginx :80 /grpc-web/ (proxy_pass → envoy:8090, proxy_buffering off)
→ Envoy :8090 (grpc_web filter: HTTP/1.1 grpc-web → HTTP/2 native gRPC)
→ gRPC server :50051 WorkerServicer.StreamChunkPipeline()
→ Enters Redis polling loop (Step 5)
```
**Files:** `ui/chunker/src/hooks/useGrpcStream.ts`, `ctrl/nginx.conf`, `ctrl/envoy.yaml`, `core/rpc/server.py`
**Key nginx config:** `proxy_buffering off` is critical — without it, nginx collects the entire upstream response before forwarding, defeating streaming entirely.
---
## Step 3: Celery Worker → ChunkHandler
```
Celery picks up run_job task
→ task.py: run_job(job_type="chunk", job_id, payload)
→ registry.get_handler("chunk") → ChunkHandler
→ chunk.py: ChunkHandler.process(job_id, payload)
→ download_to_temp(BUCKET_IN, source_key) — pulls source from MinIO/S3
→ Creates output_dir: /app/media/out/chunks/{job_id}/
→ Constructs event_bridge callback (bridges Pipeline events → Redis)
→ pipeline = Pipeline(source, ..., event_callback=event_bridge, output_dir=...)
→ pipeline.run()
```
**Files:** `core/jobs/task.py`, `core/jobs/handlers/chunk.py`
The `event_bridge` closure wraps every `Pipeline._emit()` call, forwarding to `push_event(job_id, event_type, data)` which writes to Redis.
---
## Step 4: Pipeline Orchestration (inside Celery worker process)
`Pipeline.run()` spawns multiple threads:
```
pipeline.run():
├─ Chunker(source, chunk_duration)
│ → ffprobe source file → gets duration, file_size
│ → calculates total_chunks = ceil(duration / chunk_duration)
├─ _emit("pipeline_start", {...}) → event_bridge → Redis
├─ _emit("pipeline_info", {file_size, duration, total_chunks}) → Redis
├─ Creates ChunkQueue(maxsize=10)
├─ Creates WorkerPool(num_workers=N, chunk_queue, processor, event_callback)
├─ pool.start() — spawns N worker threads
├─ MONITOR THREAD starts (_monitor_progress)
│ → Every 500ms: _emit("pipeline_progress", {elapsed, throughput_mbps}) → Redis
├─ PRODUCER THREAD starts (_produce_chunks)
│ → Iterates chunker.chunks() → yields Chunk(sequence, start_time, end_time)
│ → For each: chunk_queue.put(chunk)
│ → _emit("chunk_queued", {sequence, start_time, end_time, queue_size}) → Redis
│ → chunk_queue.close() when done (sends N sentinel Nones)
├─ WORKER THREADS (N concurrent, each runs worker.py:Worker.run())
│ │ Each worker loops:
│ │
│ ├─ chunk = chunk_queue.get(timeout=1.0)
│ ├─ _emit("chunk_processing", {sequence, state:"processing", queue_size}) → Redis
│ │
│ ├─ processor.process(chunk)
│ │ ├─ ffmpeg: runs `ffmpeg -ss start -to end -c copy chunk_NNNN.mp4`
│ │ ├─ simulated_decode: sleep(random) + checksum
│ │ └─ checksum: reads bytes, computes hash
│ │
│ ├─ On success: _emit("chunk_done", {sequence, processing_time, retries, queue_size}) → Redis
│ ├─ On failure: retries with exponential backoff (0.1s, 0.2s, 0.4s...)
│ │ └─ _emit("chunk_retry", {sequence, attempt, backoff}) → Redis
│ │ └─ _emit("chunk_error", {sequence, error, retries}) → Redis (after exhaustion)
│ │
│ └─ On sentinel (None): _emit("worker_status", {state:"stopped"}) → Redis
├─ pool.wait() — joins all worker threads, collects results
├─ monitor_stop.set() — stops progress monitor
├─ ResultCollector — reassembles results in sequence order
│ └─ _emit("chunk_collected", {sequence, buffered, emitted}) → Redis
├─ Writes manifest.json to output_dir
└─ _emit("pipeline_complete", {total_chunks, processed, failed, elapsed, throughput}) → Redis
```
**Files:** `core/chunker/pipeline.py`, `core/chunker/worker.py`, `core/chunker/pool.py`, `core/chunker/chunker.py`, `core/chunker/collector.py`
---
## Step 5: Redis — the Event Bus
```
WRITE side (Celery worker, all threads):
push_event(job_id, event_type, data)
→ json.dumps({"event": event_type, ...data})
→ Redis RPUSH to key "chunk_events:{job_id}"
→ Redis EXPIRE 3600 (1 hour TTL)
READ side (gRPC server, StreamChunkPipeline):
poll_events(job_id, cursor)
→ Redis LRANGE "chunk_events:{job_id}" cursor -1
→ Returns (parsed_events, new_cursor)
→ Called every 50ms (time.sleep(0.05) in server loop)
```
Redis acts as a decoupling layer between the Celery worker process (which runs the pipeline) and the gRPC server process (which streams to browsers). Events are appended with RPUSH and read with cursor-based LRANGE polling.
**Files:** `core/events.py`
---
## Step 6: gRPC Server → Envoy → nginx → Browser
```
server.py: StreamChunkPipeline polling loop:
while context.is_active():
events, cursor = poll_events(job_id, cursor) ← Redis LRANGE
for data in events:
yield worker_pb2.ChunkPipelineEvent( ← serialized protobuf message
job_id, event_type, sequence, worker_id,
state, queue_size, elapsed, throughput_mbps,
total_chunks, processed_chunks, failed_chunks,
error, processing_time, retries
)
if event_type in ("pipeline_complete", "pipeline_error"):
return ← ends the stream
time.sleep(0.05) ← 50ms poll interval
Each yield sends:
→ gRPC HTTP/2 DATA frame to Envoy
→ Envoy grpc_web filter: HTTP/2 → base64-encoded grpc-web-text
→ nginx proxy_pass (proxy_buffering off) → chunked HTTP/1.1 to browser
→ fetch() ReadableStream in GrpcWebFetchTransport
→ @protobuf-ts decodes protobuf → ChunkPipelineEvent TypeScript object
```
**Files:** `core/rpc/server.py`, `ctrl/envoy.yaml`, `ctrl/nginx.conf`, `ui/common/api/grpc/worker.ts`, `ui/common/api/grpc/worker.client.ts`
---
## Step 7: React State Derivation and Rendering
```
useGrpcStream.ts:
for await (const msg of stream.responses):
const evt = toEvent(msg) ← maps protobuf camelCase → snake_case PipelineEvent
setEvents(prev => [...prev, evt]) ← appends to events array
if pipeline_complete/error → setDone(true), break
App.tsx useMemo(events):
Iterates ALL events on every update, derives:
├─ chunkMap: Map<sequence, ChunkInfo> — state machine per chunk
│ pending → queued → processing → done/error/retry
├─ workerMap: Map<worker_id, WorkerInfo> — state per worker
│ idle → processing → idle → ... → stopped
├─ stats: PipelineStats
│ total_chunks, processed, failed, retries, elapsed, throughput_mbps, queue_size
├─ errors: ErrorEntry[] — every event containing an error field
└─ queueSize: number — last seen queue_size value
Renders:
├─ ChunkGrid — colored cells per chunk (pending/queued/processing/done/error)
├─ QueueGauge — current queue depth / max
├─ WorkerPanel — per-worker state + current chunk assignment
├─ StatsPanel — elapsed time, throughput, processed/failed counts
├─ ErrorLog — scrollable error list
└─ OutputFiles — download links (when done)
```
**Files:** `ui/chunker/src/hooks/useGrpcStream.ts`, `ui/chunker/src/App.tsx`
---
## Step 8: Output File Access (after pipeline completes)
```
App.tsx useEffect([done, jobId]):
→ api.ts: getChunkOutputFiles(jobId)
→ POST /graphql → graphql.py: chunk_output_files(job_id)
→ Reads /app/media/out/chunks/{job_id}/ directory listing from disk
→ Returns [{key, size, url: "/media/out/chunks/{job_id}/chunk_0001.mp4"}]
→ Browser renders download links
→ Click link → nginx /media/out/ → alias /app/media/out/ → serves file from disk
```
Chunks are written directly to `media/out/chunks/{job_id}/` by the ffmpeg processor — no MinIO upload needed for output. Nginx serves them with `autoindex on`.
**Files:** `core/api/graphql.py`, `core/jobs/handlers/chunk.py`, `ctrl/nginx.conf`
---
## Event Types Reference
| Event | Source | Key Fields |
|-------|--------|------------|
| `pipeline_start` | Pipeline.run() | source, chunk_duration, num_workers, processor_type |
| `pipeline_info` | Pipeline.run() | file_size, source_duration, total_chunks |
| `pipeline_progress` | Monitor thread (500ms) | elapsed, throughput_mbps |
| `chunk_queued` | Producer thread | sequence, start_time, end_time, duration, queue_size |
| `chunk_processing` | Worker thread | sequence, worker_id, state, queue_size |
| `chunk_done` | Worker thread | sequence, processing_time, retries, queue_size |
| `chunk_retry` | Worker thread | sequence, attempt, backoff |
| `chunk_error` | Worker thread | sequence, error, retries |
| `chunk_collected` | ResultCollector | sequence, buffered, emitted |
| `worker_status` | Worker thread | worker_id, state (idle/processing/stopped) |
| `pipeline_complete` | Pipeline.run() | total_chunks, processed, failed, elapsed, throughput_mbps |
| `pipeline_error` | Pipeline.run() | error |
---
## Thread Model (inside Celery worker)
```
Celery worker process
└─ run_job task thread
└─ Pipeline.run()
├─ Producer thread — enqueues chunks
├─ Monitor thread — emits progress every 500ms
├─ Worker thread 0 — pulls from queue, processes
├─ Worker thread 1 — pulls from queue, processes
├─ Worker thread 2 — pulls from queue, processes
└─ Worker thread 3 — pulls from queue, processes
```
All threads share the same `event_callback``event_bridge``push_event()`, which creates a new Redis connection per call. Thread-safe via Redis atomic RPUSH.
---
## Infrastructure
| Service | Port | Role |
|---------|------|------|
| nginx | 80 | Reverse proxy, static file serving |
| fastapi | 8702 | GraphQL API (Strawberry) |
| celery | — | Task worker (runs pipeline) |
| redis | 6379 | Event bus + Celery broker |
| grpc | 50051 | gRPC server (StreamChunkPipeline) |
| envoy | 8090 | gRPC-Web ↔ native gRPC translation |
| minio | 9000 | S3-compatible source media storage |
| postgres | 5432 | Job/asset metadata |

View File

@@ -1,212 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MPR - Architecture</title>
<link rel="stylesheet" href="styles.css" />
</head>
<body>
<h1>MPR - Media Processor</h1>
<p>
Media transcoding platform with dual execution modes: local (Celery
+ MinIO) and cloud (AWS Step Functions + Lambda + S3).
</p>
<nav>
<a href="#overview">System Overview</a>
<a href="#data-model">Data Model</a>
<a href="#job-flow">Job Flow</a>
<a href="#media-storage">Media Storage</a>
</nav>
<h2 id="overview">System Overview</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Local Architecture (Development)</h3>
<object type="image/svg+xml" data="01a-local-architecture.svg">
<img
src="01a-local-architecture.svg"
alt="Local Architecture"
/>
</object>
<a href="01a-local-architecture.svg" target="_blank"
>Open full size</a
>
</div>
<div class="diagram">
<h3>AWS Architecture (Production)</h3>
<object type="image/svg+xml" data="01b-aws-architecture.svg">
<img
src="01b-aws-architecture.svg"
alt="AWS Architecture"
/>
</object>
<a href="01b-aws-architecture.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Components</h3>
<ul>
<li>
<span class="color-box" style="background: #e8f4f8"></span>
Reverse Proxy (nginx)
</li>
<li>
<span class="color-box" style="background: #f0f8e8"></span>
Application Layer (Django Admin, GraphQL API, Timeline UI)
</li>
<li>
<span class="color-box" style="background: #fff8e8"></span>
Worker Layer (Celery local mode)
</li>
<li>
<span class="color-box" style="background: #fde8d0"></span>
AWS (Step Functions, Lambda - cloud mode)
</li>
<li>
<span class="color-box" style="background: #f8e8f0"></span>
Data Layer (PostgreSQL, Redis)
</li>
<li>
<span class="color-box" style="background: #f0f0f0"></span>
S3 Storage (MinIO local / AWS S3 cloud)
</li>
</ul>
</div>
<h2 id="data-model">Data Model</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Entity Relationships</h3>
<object type="image/svg+xml" data="02-data-model.svg">
<img src="02-data-model.svg" alt="Data Model" />
</object>
<a href="02-data-model.svg" target="_blank">Open full size</a>
</div>
</div>
<div class="legend">
<h3>Entities</h3>
<ul>
<li>
<span class="color-box" style="background: #4a90d9"></span>
MediaAsset - Video/audio files (S3 keys as paths)
</li>
<li>
<span class="color-box" style="background: #50b050"></span>
TranscodePreset - Encoding configurations
</li>
<li>
<span class="color-box" style="background: #d9534f"></span>
TranscodeJob - Processing queue (celery_task_id or
execution_arn)
</li>
</ul>
</div>
<h2 id="job-flow">Job Flow</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Job Lifecycle</h3>
<object type="image/svg+xml" data="03-job-flow.svg">
<img src="03-job-flow.svg" alt="Job Flow" />
</object>
<a href="03-job-flow.svg" target="_blank">Open full size</a>
</div>
</div>
<div class="legend">
<h3>Job States</h3>
<ul>
<li>
<span class="color-box" style="background: #ffc107"></span>
PENDING - Waiting in queue
</li>
<li>
<span class="color-box" style="background: #17a2b8"></span>
PROCESSING - Worker executing
</li>
<li>
<span class="color-box" style="background: #28a745"></span>
COMPLETED - Success
</li>
<li>
<span class="color-box" style="background: #dc3545"></span>
FAILED - Error occurred
</li>
<li>
<span class="color-box" style="background: #6c757d"></span>
CANCELLED - User cancelled
</li>
</ul>
<h3>Execution Modes</h3>
<ul>
<li>
<span class="color-box" style="background: #e8f4e8"></span>
Local: Celery + MinIO (S3 API) + FFmpeg
</li>
<li>
<span class="color-box" style="background: #fde8d0"></span>
Lambda: Step Functions + Lambda + AWS S3
</li>
</ul>
</div>
<h2 id="media-storage">Media Storage</h2>
<div class="diagram-container">
<p>
MPR separates media into input and output paths for flexible
storage configuration.
</p>
<p>
<a href="04-media-storage.md" target="_blank"
>View Media Storage Documentation →</a
>
</p>
</div>
<h2>API (GraphQL)</h2>
<pre><code># GraphiQL IDE
http://mpr.local.ar/graphql
# Queries
query { assets(status: "ready") { id filename duration } }
query { jobs(status: "processing") { id status progress } }
query { presets { id name container videoCodec } }
query { systemStatus { status version } }
# Mutations
mutation { scanMediaFolder { found registered skipped } }
mutation { createJob(input: { sourceAssetId: "...", presetId: "..." }) { id status } }
mutation { cancelJob(id: "...") { id status } }
mutation { retryJob(id: "...") { id status } }
mutation { updateAsset(id: "...", input: { comments: "..." }) { id comments } }
mutation { deleteAsset(id: "...") { ok } }
# Lambda callback (REST)
POST /api/jobs/{id}/callback - Lambda completion webhook</code></pre>
<h2>Access Points</h2>
<pre><code># Local development
127.0.0.1 mpr.local.ar
http://mpr.local.ar/admin - Django Admin
http://mpr.local.ar/graphql - GraphiQL
http://mpr.local.ar/ - Timeline UI
http://localhost:9001 - MinIO Console
# AWS deployment
https://mpr.mcrn.ar/ - Production</code></pre>
<h2>Quick Reference</h2>
<pre><code># Render SVGs from DOT files
for f in *.dot; do dot -Tsvg "$f" -o "${f%.dot}.svg"; done
# Switch executor mode
MPR_EXECUTOR=local # Celery + MinIO
MPR_EXECUTOR=lambda # Step Functions + Lambda + S3</code></pre>
</body>
</html>

View File

@@ -3,6 +3,8 @@
--text-color: #e8e8e8;
--accent-color: #4a90d9;
--border-color: #333;
--sidebar-width: 220px;
--sidebar-bg: #151528;
}
* {
@@ -16,6 +18,59 @@ body {
background-color: var(--bg-color);
color: var(--text-color);
line-height: 1.6;
}
/* Sidebar navigation */
.sidebar {
position: fixed;
top: 0;
left: 0;
width: var(--sidebar-width);
height: 100vh;
background: var(--sidebar-bg);
border-right: 1px solid var(--border-color);
padding: 1.5rem 1rem;
overflow-y: auto;
z-index: 10;
}
.sidebar h2 {
font-size: 1.2rem;
color: var(--accent-color);
margin-bottom: 1.5rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid var(--border-color);
}
.sidebar ul {
list-style: none;
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.sidebar li {
display: block;
}
.sidebar a {
display: block;
padding: 0.4rem 0.6rem;
color: var(--text-color);
text-decoration: none;
font-size: 0.85rem;
border-radius: 4px;
transition: background 0.15s, color 0.15s;
}
.sidebar a:hover {
background: rgba(74, 144, 217, 0.15);
color: var(--accent-color);
}
/* Main content */
.content {
margin-left: var(--sidebar-width);
padding: 2rem;
}
@@ -25,12 +80,13 @@ h1 {
color: var(--accent-color);
}
h2 {
.content > h2 {
font-size: 1.5rem;
margin: 2rem 0 1rem;
color: var(--text-color);
border-bottom: 1px solid var(--border-color);
padding-bottom: 0.5rem;
scroll-margin-top: 1rem;
}
.diagram-container {
@@ -76,20 +132,6 @@ h2 {
text-decoration: underline;
}
nav {
margin-bottom: 2rem;
}
nav a {
color: var(--accent-color);
text-decoration: none;
margin-right: 1.5rem;
}
nav a:hover {
text-decoration: underline;
}
.legend {
margin-top: 2rem;
padding: 1rem;
@@ -141,3 +183,27 @@ pre code {
background: none;
padding: 0;
}
/* Responsive: collapse sidebar on small screens */
@media (max-width: 768px) {
.sidebar {
position: static;
width: 100%;
height: auto;
border-right: none;
border-bottom: 1px solid var(--border-color);
}
.sidebar ul {
flex-direction: row;
flex-wrap: wrap;
}
.content {
margin-left: 0;
}
.diagram {
min-width: 100%;
}
}

View File

@@ -7,219 +7,241 @@
<link rel="stylesheet" href="architecture/styles.css" />
</head>
<body>
<h1>MPR - Media Processor</h1>
<p>
Media transcoding platform with three execution modes: local (Celery
+ MinIO), AWS (Step Functions + Lambda + S3), and GCP (Cloud Run
Jobs + GCS). Storage is S3-compatible across all environments.
</p>
<nav>
<a href="#overview">System Overview</a>
<a href="#data-model">Data Model</a>
<a href="#job-flow">Job Flow</a>
<a href="#media-storage">Media Storage</a>
<nav class="sidebar">
<h2>MPR</h2>
<ul>
<li><a href="#overview">System Overview</a></li>
<li><a href="#data-model">Data Model</a></li>
<li><a href="#job-flow">Job Flow</a></li>
<li><a href="#media-storage">Media Storage</a></li>
<li><a href="#chunker-pipeline">Chunker Pipeline</a></li>
<li><a href="#api">API (GraphQL)</a></li>
<li><a href="#access-points">Access Points</a></li>
<li><a href="#quick-reference">Quick Reference</a></li>
</ul>
</nav>
<h2 id="overview">System Overview</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Local Architecture (Development)</h3>
<object
type="image/svg+xml"
data="architecture/01a-local-architecture.svg"
>
<img
src="architecture/01a-local-architecture.svg"
alt="Local Architecture"
/>
</object>
<a
href="architecture/01a-local-architecture.svg"
target="_blank"
>Open full size</a
>
</div>
<div class="diagram">
<h3>AWS Architecture (Production)</h3>
<object
type="image/svg+xml"
data="architecture/01b-aws-architecture.svg"
>
<img
src="architecture/01b-aws-architecture.svg"
alt="AWS Architecture"
/>
</object>
<a href="architecture/01b-aws-architecture.svg" target="_blank"
>Open full size</a
>
</div>
<div class="diagram">
<h3>GCP Architecture (Production)</h3>
<object
type="image/svg+xml"
data="architecture/01c-gcp-architecture.svg"
>
<img
src="architecture/01c-gcp-architecture.svg"
alt="GCP Architecture"
/>
</object>
<a href="architecture/01c-gcp-architecture.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Components</h3>
<ul>
<li>
<span class="color-box" style="background: #e8f4f8"></span>
Reverse Proxy (nginx)
</li>
<li>
<span class="color-box" style="background: #f0f8e8"></span>
Application Layer (Django Admin, GraphQL API, Timeline UI)
</li>
<li>
<span class="color-box" style="background: #fff8e8"></span>
Worker Layer (Celery local mode)
</li>
<li>
<span class="color-box" style="background: #fde8d0"></span>
AWS (Step Functions, Lambda)
</li>
<li>
<span class="color-box" style="background: #e8f0fd"></span>
GCP (Cloud Run Jobs + GCS)
</li>
<li>
<span class="color-box" style="background: #f8e8f0"></span>
Data Layer (PostgreSQL, Redis)
</li>
<li>
<span class="color-box" style="background: #f0f0f0"></span>
S3-compatible Storage (MinIO / AWS S3 / GCS)
</li>
</ul>
</div>
<h2 id="data-model">Data Model</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Entity Relationships</h3>
<object
type="image/svg+xml"
data="architecture/02-data-model.svg"
>
<img
src="architecture/02-data-model.svg"
alt="Data Model"
/>
</object>
<a href="architecture/02-data-model.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Entities</h3>
<ul>
<li>
<span class="color-box" style="background: #4a90d9"></span>
MediaAsset - Video/audio files with metadata
</li>
<li>
<span class="color-box" style="background: #50b050"></span>
TranscodePreset - Encoding configurations
</li>
<li>
<span class="color-box" style="background: #d9534f"></span>
TranscodeJob - Processing queue items
</li>
</ul>
</div>
<h2 id="job-flow">Job Flow</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Job Lifecycle</h3>
<object
type="image/svg+xml"
data="architecture/03-job-flow.svg"
>
<img src="architecture/03-job-flow.svg" alt="Job Flow" />
</object>
<a href="architecture/03-job-flow.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Job States</h3>
<ul>
<li>
<span class="color-box" style="background: #ffc107"></span>
PENDING - Waiting in queue
</li>
<li>
<span class="color-box" style="background: #17a2b8"></span>
PROCESSING - Worker executing
</li>
<li>
<span class="color-box" style="background: #28a745"></span>
COMPLETED - Success
</li>
<li>
<span class="color-box" style="background: #dc3545"></span>
FAILED - Error occurred
</li>
<li>
<span class="color-box" style="background: #6c757d"></span>
CANCELLED - User cancelled
</li>
</ul>
</div>
<h2 id="media-storage">Media Storage</h2>
<div class="diagram-container">
<main class="content">
<h1>MPR - Media Processor</h1>
<p>
MPR separates media into <strong>input</strong> and
<strong>output</strong> paths, each independently configurable.
File paths are stored
<strong>relative to their respective root</strong> to ensure
portability between local development and cloud deployments (AWS
S3, etc.).
Media transcoding platform with three execution modes: local (Celery
+ MinIO), AWS (Step Functions + Lambda + S3), and GCP (Cloud Run
Jobs + GCS). Storage is S3-compatible across all environments.
</p>
</div>
<div class="legend">
<h3>Input / Output Separation</h3>
<ul>
<li>
<span class="color-box" style="background: #4a90d9"></span>
<code>MEDIA_IN</code> - Source media files to process
</li>
<li>
<span class="color-box" style="background: #50b050"></span>
<code>MEDIA_OUT</code> - Transcoded/trimmed output files
</li>
</ul>
<p><strong>Why Relative Paths?</strong></p>
<ul>
<li>Portability: Same database works locally and in cloud</li>
<li>Flexibility: Easy to switch between storage backends</li>
<li>Simplicity: No need to update paths when migrating</li>
</ul>
</div>
<h2 id="overview">System Overview</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Local Architecture (Development)</h3>
<object
type="image/svg+xml"
data="architecture/01a-local-architecture.svg"
>
<img
src="architecture/01a-local-architecture.svg"
alt="Local Architecture"
/>
</object>
<a
href="architecture/01a-local-architecture.svg"
target="_blank"
>Open full size</a
>
</div>
<div class="diagram">
<h3>AWS Architecture (Production)</h3>
<object
type="image/svg+xml"
data="architecture/01b-aws-architecture.svg"
>
<img
src="architecture/01b-aws-architecture.svg"
alt="AWS Architecture"
/>
</object>
<a href="architecture/01b-aws-architecture.svg" target="_blank"
>Open full size</a
>
</div>
<div class="diagram">
<h3>GCP Architecture (Production)</h3>
<object
type="image/svg+xml"
data="architecture/01c-gcp-architecture.svg"
>
<img
src="architecture/01c-gcp-architecture.svg"
alt="GCP Architecture"
/>
</object>
<a href="architecture/01c-gcp-architecture.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Local Development</h3>
<pre><code>MEDIA_IN=/app/media/in
<div class="legend">
<h3>Components</h3>
<ul>
<li>
<span class="color-box" style="background: #e8f4f8"></span>
Reverse Proxy (nginx)
</li>
<li>
<span class="color-box" style="background: #f0f8e8"></span>
Application Layer (Django Admin, GraphQL API, Timeline UI)
</li>
<li>
<span class="color-box" style="background: #fff8e8"></span>
Worker Layer (Celery local mode)
</li>
<li>
<span class="color-box" style="background: #fde8d0"></span>
AWS (Step Functions, Lambda)
</li>
<li>
<span class="color-box" style="background: #e8f0fd"></span>
GCP (Cloud Run Jobs + GCS)
</li>
<li>
<span class="color-box" style="background: #f8e8f0"></span>
Data Layer (PostgreSQL, Redis)
</li>
<li>
<span class="color-box" style="background: #f0f0f0"></span>
S3-compatible Storage (MinIO / AWS S3 / GCS)
</li>
</ul>
</div>
<h2 id="data-model">Data Model</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Entity Relationships</h3>
<object
type="image/svg+xml"
data="architecture/02-data-model.svg"
>
<img
src="architecture/02-data-model.svg"
alt="Data Model"
/>
</object>
<a href="architecture/02-data-model.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Entities</h3>
<ul>
<li>
<span class="color-box" style="background: #4a90d9"></span>
MediaAsset - Video/audio files with metadata
</li>
<li>
<span class="color-box" style="background: #50b050"></span>
TranscodePreset - Encoding configurations
</li>
<li>
<span class="color-box" style="background: #d9534f"></span>
TranscodeJob - Processing queue items
</li>
</ul>
</div>
<h2 id="job-flow">Job Flow</h2>
<div class="diagram-container">
<div class="diagram">
<h3>Job Lifecycle</h3>
<object
type="image/svg+xml"
data="architecture/03-job-flow.svg"
>
<img src="architecture/03-job-flow.svg" alt="Job Flow" />
</object>
<a href="architecture/03-job-flow.svg" target="_blank"
>Open full size</a
>
</div>
</div>
<div class="legend">
<h3>Job States</h3>
<ul>
<li>
<span class="color-box" style="background: #ffc107"></span>
PENDING - Waiting in queue
</li>
<li>
<span class="color-box" style="background: #17a2b8"></span>
PROCESSING - Worker executing
</li>
<li>
<span class="color-box" style="background: #28a745"></span>
COMPLETED - Success
</li>
<li>
<span class="color-box" style="background: #dc3545"></span>
FAILED - Error occurred
</li>
<li>
<span class="color-box" style="background: #6c757d"></span>
CANCELLED - User cancelled
</li>
</ul>
<h3>Execution Modes</h3>
<ul>
<li>
<span class="color-box" style="background: #e8f4e8"></span>
Local: Celery + MinIO (S3 API) + FFmpeg
</li>
<li>
<span class="color-box" style="background: #fde8d0"></span>
Lambda: Step Functions + Lambda + AWS S3
</li>
<li>
<span class="color-box" style="background: #e8f0fd"></span>
GCP: Cloud Run Jobs + GCS (S3 compat)
</li>
</ul>
</div>
<h2 id="media-storage">Media Storage</h2>
<div class="diagram-container">
<p>
MPR separates media into <strong>input</strong> and
<strong>output</strong> paths, each independently configurable.
File paths are stored
<strong>relative to their respective root</strong> to ensure
portability between local development and cloud deployments.
</p>
</div>
<div class="legend">
<h3>Input / Output Separation</h3>
<ul>
<li>
<span class="color-box" style="background: #4a90d9"></span>
<code>MEDIA_IN</code> - Source media files to process
</li>
<li>
<span class="color-box" style="background: #50b050"></span>
<code>MEDIA_OUT</code> - Transcoded/trimmed output files
</li>
</ul>
<p><strong>Why Relative Paths?</strong></p>
<ul>
<li>Portability: Same database works locally and in cloud</li>
<li>Flexibility: Easy to switch between storage backends</li>
<li>Simplicity: No need to update paths when migrating</li>
</ul>
</div>
<div class="legend">
<h3>Local Development</h3>
<pre><code>MEDIA_IN=/app/media/in
MEDIA_OUT=/app/media/out
/app/media/
@@ -228,52 +250,131 @@ MEDIA_OUT=/app/media/out
│ └── subfolder/video3.mp4
└── out/ # Transcoded output
└── video1_h264.mp4</code></pre>
</div>
</div>
<div class="legend">
<h3>AWS/Cloud Deployment</h3>
<pre><code>MEDIA_IN=s3://source-bucket/media/
<div class="legend">
<h3>AWS/Cloud Deployment</h3>
<pre><code>MEDIA_IN=s3://source-bucket/media/
MEDIA_OUT=s3://output-bucket/transcoded/
MEDIA_BASE_URL=https://source-bucket.s3.amazonaws.com/media/</code></pre>
<p>
Database paths remain unchanged (already relative). Just upload
files to S3 and update environment variables.
</p>
</div>
<p>
Database paths remain unchanged (already relative). Just upload
files to S3 and update environment variables.
</p>
</div>
<div class="legend">
<h3>API (GraphQL)</h3>
<p>
All client interactions go through GraphQL at
<code>/graphql</code>.
<a href="architecture/04-media-storage.md" target="_blank"
>Full Media Storage Documentation &rarr;</a
>
</p>
<ul>
<li>
<code>scanMediaFolder</code> - Scan S3 bucket for media
files
</li>
<li><code>createJob</code> - Create transcode/trim job</li>
<li>
<code>cancelJob / retryJob</code> - Job lifecycle management
</li>
<li>
<code>updateAsset / deleteAsset</code> - Asset management
</li>
</ul>
<p><strong>Supported File Types:</strong></p>
<p>
Video: mp4, mkv, avi, mov, webm, flv, wmv, m4v<br />
Audio: mp3, wav, flac, aac, ogg, m4a
</p>
</div>
<h2>Access Points</h2>
<pre><code># Add to /etc/hosts
<h2 id="chunker-pipeline">Chunker Pipeline</h2>
<div class="diagram-container">
<p>
The chunker pipeline splits media into time-based segments,
streaming real-time events from worker threads through Redis
and gRPC-Web to the browser UI. 7 hops from worker thread to pixel.
</p>
</div>
<div class="legend">
<h3>Event Path</h3>
<pre><code>Worker thread → Pipeline._emit() → event_bridge() → Redis RPUSH
→ [50ms poll] gRPC server LRANGE → yield protobuf
→ HTTP/2 frame → Envoy (grpc-web filter)
→ HTTP/1.1 chunk → nginx (proxy_buffering off)
→ fetch ReadableStream → protobuf-ts decode
→ setEvents([...prev, evt]) → React re-render</code></pre>
</div>
<div class="legend">
<h3>Thread Model (inside Celery worker)</h3>
<pre><code>Celery worker process
└─ run_job task thread
└─ Pipeline.run()
├─ Producer thread — enqueues chunks
├─ Monitor thread — emits progress every 500ms
├─ Worker thread 0 — pulls from queue, processes
├─ Worker thread 1 — pulls from queue, processes
├─ Worker thread 2 — pulls from queue, processes
└─ Worker thread 3 — pulls from queue, processes</code></pre>
</div>
<div class="legend">
<h3>Infrastructure</h3>
<ul>
<li><code>nginx :80</code> - Reverse proxy, static file serving</li>
<li><code>fastapi :8702</code> - GraphQL API (Strawberry)</li>
<li><code>celery</code> - Task worker (runs pipeline)</li>
<li><code>redis :6379</code> - Event bus + Celery broker</li>
<li><code>grpc :50051</code> - gRPC server (StreamChunkPipeline)</li>
<li><code>envoy :8090</code> - gRPC-Web &harr; native gRPC translation</li>
<li><code>minio :9000</code> - S3-compatible source media storage</li>
<li><code>postgres :5432</code> - Job/asset metadata</li>
</ul>
</div>
<p>
<a href="architecture/05-chunker-pipeline.md" target="_blank"
>Full Chunker Pipeline Documentation &rarr;</a
>
</p>
<h2 id="api">API (GraphQL)</h2>
<div class="legend">
<p>
All client interactions go through GraphQL at
<code>/graphql</code>.
</p>
<pre><code># GraphiQL IDE
http://mpr.local.ar/graphql
# Queries
query { assets(status: "ready") { id filename duration } }
query { jobs(status: "processing") { id status progress } }
query { presets { id name container videoCodec } }
query { systemStatus { status version } }
# Mutations
mutation { scanMediaFolder { found registered skipped } }
mutation { createJob(input: { sourceAssetId: "...", presetId: "..." }) { id status } }
mutation { cancelJob(id: "...") { id status } }
mutation { retryJob(id: "...") { id status } }
mutation { updateAsset(id: "...", input: { comments: "..." }) { id comments } }
mutation { deleteAsset(id: "...") { ok } }
# Lambda callback (REST)
POST /api/jobs/{id}/callback - Lambda completion webhook</code></pre>
<p><strong>Supported File Types:</strong></p>
<p>
Video: mp4, mkv, avi, mov, webm, flv, wmv, m4v<br />
Audio: mp3, wav, flac, aac, ogg, m4a
</p>
</div>
<h2 id="access-points">Access Points</h2>
<pre><code># Add to /etc/hosts
127.0.0.1 mpr.local.ar
# URLs
http://mpr.local.ar/admin - Django Admin
http://mpr.local.ar/graphql - GraphiQL IDE
http://mpr.local.ar/ - Timeline UI</code></pre>
http://mpr.local.ar/admin - Django Admin
http://mpr.local.ar/graphql - GraphiQL IDE
http://mpr.local.ar/ - Timeline UI
http://mpr.local.ar/chunker/ - Chunker UI
http://localhost:9001 - MinIO Console
# AWS deployment
https://mpr.mcrn.ar/ - Production</code></pre>
<h2 id="quick-reference">Quick Reference</h2>
<pre><code># Render SVGs from DOT files
for f in docs/architecture/*.dot; do dot -Tsvg "$f" -o "${f%.dot}.svg"; done
# Switch executor mode
MPR_EXECUTOR=local # Celery + MinIO
MPR_EXECUTOR=lambda # Step Functions + Lambda + S3
MPR_EXECUTOR=gcp # Cloud Run Jobs + GCS</code></pre>
</main>
</body>
</html>

View File

@@ -101,6 +101,12 @@ class SchemaLoader:
for enum_cls in enums:
self.enums.append(self._parse_enum(enum_cls))
# Extract VIEWS (view/event projections)
if load_all or "views" in include:
views = getattr(module, "VIEWS", [])
for cls in views:
self.api_models.append(self._parse_dataclass(cls))
# Extract GRPC_MESSAGES (optional)
if load_all or "grpc" in include:
grpc_messages = getattr(module, "GRPC_MESSAGES", [])

0
tests/__init__.py Normal file
View File

View File

76
tests/chunker/conftest.py Normal file
View File

@@ -0,0 +1,76 @@
"""
Shared fixtures for chunker tests.
Demonstrates: TDD and unit testing best practices (Interview Topic 8) — fixtures, temp files.
"""
import os
import tempfile
import pytest
from core.chunker.models import Chunk, ChunkResult
@pytest.fixture
def temp_file():
"""Create a temporary file with known content, cleaned up after test."""
files = []
def _create(content: bytes = b"x" * 4096):
f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
f.write(content)
f.close()
files.append(f.name)
return f.name
yield _create
for path in files:
if os.path.exists(path):
os.unlink(path)
@pytest.fixture
def sample_chunk(temp_file):
"""Create a sample time-based Chunk with valid time range."""
path = temp_file(b"x" * 1024)
return Chunk(
sequence=0,
start_time=0.0,
end_time=10.0,
source_path=path,
duration=10.0,
)
@pytest.fixture
def make_chunk(temp_file):
"""Factory fixture for creating time-based chunks with specific sequence numbers."""
path = temp_file(b"x" * 1024)
def _make(sequence: int, duration: float = 10.0) -> Chunk:
start = sequence * duration
return Chunk(
sequence=sequence,
start_time=start,
end_time=start + duration,
source_path=path,
duration=duration,
)
return _make
@pytest.fixture
def make_result():
"""Factory fixture for creating ChunkResults."""
def _make(sequence: int, success: bool = True, processing_time: float = 0.01) -> ChunkResult:
return ChunkResult(
sequence=sequence,
success=success,
processing_time=processing_time,
)
return _make

View File

@@ -0,0 +1,149 @@
"""
Tests for Chunker — time-based segmentation, chunk counts, sequence numbers, generator behavior.
Demonstrates: TDD (Interview Topic 8) — parametrized tests, edge cases, mocking.
"""
from unittest.mock import patch, MagicMock
import pytest
from core.chunker import Chunker
from core.chunker.exceptions import ChunkReadError
def mock_probe(duration):
"""Create a mock probe_file that returns the given duration."""
result = MagicMock()
result.duration = duration
return result
class TestChunker:
@patch("core.chunker.chunker.probe_file")
def test_basic_chunking(self, mock_pf, temp_file):
"""File splits into expected number of time-based chunks."""
path = temp_file(b"x" * 1000)
mock_pf.return_value = mock_probe(30.0)
chunker = Chunker(path, chunk_duration=10.0)
chunks = list(chunker.chunks())
assert len(chunks) == 3
assert chunks[0].start_time == 0.0
assert chunks[0].end_time == 10.0
assert chunks[0].duration == 10.0
assert chunks[1].start_time == 10.0
assert chunks[2].start_time == 20.0
@patch("core.chunker.chunker.probe_file")
def test_sequence_numbers(self, mock_pf, temp_file):
"""Chunks have sequential sequence numbers starting at 0."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(40.0)
chunker = Chunker(path, chunk_duration=10.0)
chunks = list(chunker.chunks())
sequences = [c.sequence for c in chunks]
assert sequences == [0, 1, 2, 3]
@patch("core.chunker.chunker.probe_file")
def test_time_ranges(self, mock_pf, temp_file):
"""Each chunk has correct start_time and end_time."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(25.0)
chunker = Chunker(path, chunk_duration=10.0)
chunks = list(chunker.chunks())
assert chunks[0].start_time == 0.0
assert chunks[0].end_time == 10.0
assert chunks[1].start_time == 10.0
assert chunks[1].end_time == 20.0
assert chunks[2].start_time == 20.0
assert chunks[2].end_time == 25.0 # last chunk shorter
assert chunks[2].duration == 5.0
@patch("core.chunker.chunker.probe_file")
def test_expected_chunks_property(self, mock_pf, temp_file):
"""expected_chunks calculates correctly before iteration."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(25.0)
chunker = Chunker(path, chunk_duration=10.0)
assert chunker.expected_chunks == 3 # ceil(25/10)
@patch("core.chunker.chunker.probe_file")
def test_source_path_on_chunks(self, mock_pf, temp_file):
"""Each chunk carries the source file path."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(10.0)
chunker = Chunker(path, chunk_duration=10.0)
chunks = list(chunker.chunks())
assert all(c.source_path == path for c in chunks)
def test_file_not_found(self):
"""Non-existent file raises ChunkReadError."""
with pytest.raises(ChunkReadError, match="File not found"):
Chunker("/nonexistent/file.mp4")
@patch("core.chunker.chunker.probe_file")
def test_invalid_chunk_duration(self, mock_pf, temp_file):
"""Zero or negative chunk_duration raises ValueError."""
path = temp_file(b"x" * 100)
with pytest.raises(ValueError, match="chunk_duration must be positive"):
Chunker(path, chunk_duration=0)
with pytest.raises(ValueError, match="chunk_duration must be positive"):
Chunker(path, chunk_duration=-1)
@patch("core.chunker.chunker.probe_file")
def test_generator_laziness(self, mock_pf, temp_file):
"""Chunks are yielded lazily, not pre-loaded."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(30.0)
chunker = Chunker(path, chunk_duration=10.0)
gen = chunker.chunks()
first = next(gen)
assert first.sequence == 0
# Generator is not exhausted — remaining chunks still pending
@pytest.mark.parametrize("duration,chunk_dur,expected", [
(10.0, 10.0, 1),
(10.1, 10.0, 2),
(1.0, 1.0, 1),
(100.0, 1.0, 100),
(5.0, 100.0, 1),
])
@patch("core.chunker.chunker.probe_file")
def test_expected_chunks_parametrized(self, mock_pf, temp_file, duration, chunk_dur, expected):
"""Parametrized: various duration/chunk_duration combos."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(duration)
chunker = Chunker(path, chunk_duration=chunk_dur)
assert chunker.expected_chunks == expected
@patch("core.chunker.chunker.probe_file")
def test_exact_multiple(self, mock_pf, temp_file):
"""Duration exactly divisible by chunk_duration."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(30.0)
chunker = Chunker(path, chunk_duration=10.0)
chunks = list(chunker.chunks())
assert len(chunks) == 3
assert all(c.duration == 10.0 for c in chunks)
@patch("core.chunker.chunker.probe_file")
def test_probe_failure(self, mock_pf, temp_file):
"""Probe failure raises ChunkReadError."""
path = temp_file(b"x" * 100)
mock_pf.side_effect = Exception("ffprobe failed")
with pytest.raises(ChunkReadError, match="Failed to probe"):
Chunker(path, chunk_duration=10.0)

View File

@@ -0,0 +1,103 @@
"""
Tests for ResultCollector — ordered reassembly, out-of-order buffering, duplicates.
Demonstrates: TDD (Interview Topic 8) — testing algorithms (heapq reassembly).
"""
import pytest
from core.chunker.collector import ResultCollector
from core.chunker.exceptions import ReassemblyError
class TestResultCollector:
def test_in_order_emission(self, make_result):
"""Results arriving in order are emitted immediately."""
collector = ResultCollector(total_chunks=3)
emitted = collector.add(make_result(0))
assert len(emitted) == 1
assert emitted[0].sequence == 0
emitted = collector.add(make_result(1))
assert len(emitted) == 1
emitted = collector.add(make_result(2))
assert len(emitted) == 1
assert collector.is_complete
def test_out_of_order_buffering(self, make_result):
"""Out-of-order results are buffered until gaps fill."""
collector = ResultCollector(total_chunks=3)
# Arrive: 2, 0, 1
emitted = collector.add(make_result(2))
assert len(emitted) == 0
assert collector.buffered_count == 1
emitted = collector.add(make_result(0))
assert len(emitted) == 1 # Only 0 emitted, 1 still missing
emitted = collector.add(make_result(1))
assert len(emitted) == 2 # 1 and 2 now emittable
assert collector.is_complete
def test_reverse_order(self, make_result):
"""All results arrive in reverse — only last add emits everything."""
collector = ResultCollector(total_chunks=4)
for seq in [3, 2, 1]:
emitted = collector.add(make_result(seq))
assert len(emitted) == 0
emitted = collector.add(make_result(0))
assert len(emitted) == 4
assert collector.is_complete
def test_duplicate_raises(self, make_result):
"""Duplicate sequence number raises ReassemblyError."""
collector = ResultCollector(total_chunks=3)
collector.add(make_result(0))
with pytest.raises(ReassemblyError, match="Duplicate"):
collector.add(make_result(0))
def test_emitted_count(self, make_result):
"""emitted_count tracks correctly."""
collector = ResultCollector(total_chunks=3)
assert collector.emitted_count == 0
collector.add(make_result(0))
assert collector.emitted_count == 1
collector.add(make_result(2)) # buffered
assert collector.emitted_count == 1
collector.add(make_result(1)) # releases 1 and 2
assert collector.emitted_count == 3
def test_get_ordered_results(self, make_result):
"""get_ordered_results returns all emitted results in order."""
collector = ResultCollector(total_chunks=3)
collector.add(make_result(2))
collector.add(make_result(0))
collector.add(make_result(1))
ordered = collector.get_ordered_results()
assert [r.sequence for r in ordered] == [0, 1, 2]
def test_avg_processing_time(self, make_result):
"""Average processing time from sliding window."""
collector = ResultCollector(total_chunks=2)
collector.add(make_result(0, processing_time=0.1))
collector.add(make_result(1, processing_time=0.3))
assert abs(collector.avg_processing_time - 0.2) < 0.001
def test_not_complete_when_partial(self, make_result):
"""is_complete is False until all chunks emitted."""
collector = ResultCollector(total_chunks=3)
collector.add(make_result(0))
collector.add(make_result(1))
assert not collector.is_complete

View File

@@ -0,0 +1,69 @@
"""
Tests for exception hierarchy — catch patterns, attributes.
Demonstrates: TDD (Interview Topic 8) — testing exception design.
"""
import pytest
from core.chunker.exceptions import (
ChunkChecksumError,
ChunkError,
ChunkReadError,
PipelineError,
ProcessingError,
ProcessorFailureError,
ProcessorTimeoutError,
ReassemblyError,
)
class TestExceptionHierarchy:
"""Verify the exception class hierarchy and catch patterns."""
def test_pipeline_error_is_base(self):
"""All chunker exceptions inherit from PipelineError."""
assert issubclass(ChunkError, PipelineError)
assert issubclass(ProcessingError, PipelineError)
assert issubclass(ReassemblyError, PipelineError)
def test_chunk_error_subtypes(self):
"""ChunkReadError and ChunkChecksumError are ChunkErrors."""
assert issubclass(ChunkReadError, ChunkError)
assert issubclass(ChunkChecksumError, ChunkError)
def test_processing_error_subtypes(self):
"""ProcessorTimeoutError and ProcessorFailureError are ProcessingErrors."""
assert issubclass(ProcessorTimeoutError, ProcessingError)
assert issubclass(ProcessorFailureError, ProcessingError)
def test_catch_pipeline_error_catches_all(self):
"""Catching PipelineError catches any subtype."""
with pytest.raises(PipelineError):
raise ChunkReadError("test")
with pytest.raises(PipelineError):
raise ReassemblyError("test")
def test_checksum_error_attributes(self):
"""ChunkChecksumError carries sequence, expected, actual."""
err = ChunkChecksumError(sequence=5, expected="aaa", actual="bbb")
assert err.sequence == 5
assert err.expected == "aaa"
assert err.actual == "bbb"
assert "5" in str(err)
def test_timeout_error_attributes(self):
"""ProcessorTimeoutError carries sequence and timeout."""
err = ProcessorTimeoutError(sequence=3, timeout=30.0)
assert err.sequence == 3
assert err.timeout == 30.0
def test_failure_error_attributes(self):
"""ProcessorFailureError carries sequence, retries, original error."""
original = RuntimeError("boom")
err = ProcessorFailureError(sequence=1, retries=3, original_error=original)
assert err.sequence == 1
assert err.retries == 3
assert err.original_error is original
assert "boom" in str(err)

View File

@@ -0,0 +1,144 @@
"""
Tests for Pipeline — end-to-end orchestration, stats, error handling.
Demonstrates: TDD (Interview Topic 8) — integration testing with mocked FFmpeg probe.
"""
from unittest.mock import MagicMock, patch
import pytest
from core.chunker import Pipeline
from core.chunker.exceptions import PipelineError
def mock_probe(duration):
"""Create a mock ProbeResult with the given duration."""
result = MagicMock()
result.duration = duration
return result
class TestPipeline:
@patch("core.chunker.chunker.probe_file")
def test_end_to_end(self, mock_pf, temp_file):
"""Full pipeline processes a file successfully."""
path = temp_file(b"x" * 4096)
mock_pf.return_value = mock_probe(40.0)
result = Pipeline(
source=path,
chunk_duration=10.0,
num_workers=2,
processor_type="checksum",
).run()
assert result.total_chunks == 4
assert result.processed == 4
assert result.failed == 0
assert result.elapsed_time > 0
assert result.chunks_in_order is True
@patch("core.chunker.chunker.probe_file")
def test_throughput_calculated(self, mock_pf, temp_file):
"""Pipeline calculates throughput."""
path = temp_file(b"x" * 10000)
mock_pf.return_value = mock_probe(30.0)
result = Pipeline(source=path, chunk_duration=10.0, num_workers=2).run()
assert result.throughput_mbps > 0
@patch("core.chunker.chunker.probe_file")
def test_worker_stats(self, mock_pf, temp_file):
"""Pipeline reports per-worker stats."""
path = temp_file(b"x" * 4000)
mock_pf.return_value = mock_probe(40.0)
result = Pipeline(
source=path, chunk_duration=10.0, num_workers=2
).run()
assert len(result.worker_stats) == 2
for worker_id, stats in result.worker_stats.items():
assert "processed" in stats
assert "errors" in stats
def test_nonexistent_file(self):
"""Non-existent file raises PipelineError."""
with pytest.raises(PipelineError):
Pipeline(source="/nonexistent/file.mp4").run()
@patch("core.chunker.chunker.probe_file")
def test_event_callback(self, mock_pf, temp_file):
"""Pipeline emits events through callback."""
path = temp_file(b"x" * 2048)
mock_pf.return_value = mock_probe(20.0)
events = []
def capture(event_type, data):
events.append(event_type)
Pipeline(
source=path,
chunk_duration=10.0,
num_workers=1,
event_callback=capture,
).run()
assert "pipeline_start" in events
assert "pipeline_complete" in events
assert "chunk_queued" in events
@patch("core.chunker.chunker.probe_file")
def test_simulated_decode_processor(self, mock_pf, temp_file):
"""Pipeline works with simulated_decode processor."""
path = temp_file(b"x" * 2048)
mock_pf.return_value = mock_probe(20.0)
result = Pipeline(
source=path,
chunk_duration=10.0,
num_workers=2,
processor_type="simulated_decode",
).run()
assert result.total_chunks == 2
assert result.failed == 0
@patch("core.chunker.chunker.probe_file")
def test_single_chunk_file(self, mock_pf, temp_file):
"""Duration shorter than chunk_duration produces one chunk."""
path = temp_file(b"x" * 100)
mock_pf.return_value = mock_probe(5.0)
result = Pipeline(source=path, chunk_duration=10.0).run()
assert result.total_chunks == 1
assert result.processed == 1
@patch("core.chunker.chunker.probe_file")
def test_retries_tracked(self, mock_pf, temp_file):
"""Pipeline result tracks total retries."""
path = temp_file(b"x" * 2048)
mock_pf.return_value = mock_probe(20.0)
result = Pipeline(source=path, chunk_duration=10.0).run()
assert result.retries >= 0 # Might be 0 if no failures
@patch("core.chunker.chunker.probe_file")
def test_output_dir_and_chunk_files(self, mock_pf, temp_file):
"""Pipeline tracks output_dir and chunk_files when set."""
path = temp_file(b"x" * 1024)
mock_pf.return_value = mock_probe(10.0)
result = Pipeline(
source=path,
chunk_duration=10.0,
processor_type="checksum",
).run()
# No output_dir set, so chunk_files should be empty
assert result.output_dir is None
assert result.chunk_files == []

View File

@@ -0,0 +1,98 @@
"""
Tests for Processor implementations — ChecksumProcessor, SimulatedDecodeProcessor, CompositeProcessor.
Demonstrates: TDD (Interview Topic 8) — ABC contract, parametrized tests.
"""
import pytest
from core.chunker.exceptions import ChunkChecksumError
from core.chunker.models import Chunk
from core.chunker.processor import (
ChecksumProcessor,
CompositeProcessor,
Processor,
SimulatedDecodeProcessor,
)
class TestChecksumProcessor:
def test_valid_time_range(self, sample_chunk):
"""Valid time range passes."""
proc = ChecksumProcessor()
result = proc.process(sample_chunk)
assert result.success is True
assert result.checksum_valid is True
assert result.processing_time > 0
def test_invalid_time_range(self):
"""Invalid time range raises ChunkChecksumError."""
chunk = Chunk(
sequence=0,
start_time=10.0,
end_time=10.0, # zero duration
source_path="/fake.mp4",
duration=0.0,
)
proc = ChecksumProcessor()
with pytest.raises(ChunkChecksumError) as exc_info:
proc.process(chunk)
assert exc_info.value.sequence == 0
def test_sequence_preserved(self, make_chunk):
"""Result carries the chunk's sequence number."""
chunk = make_chunk(42)
proc = ChecksumProcessor()
result = proc.process(chunk)
assert result.sequence == 42
class TestSimulatedDecodeProcessor:
def test_processes_successfully(self, sample_chunk):
"""Simulated decode always succeeds."""
proc = SimulatedDecodeProcessor(ms_per_second=1.0)
result = proc.process(sample_chunk)
assert result.success is True
assert result.processing_time > 0
def test_time_proportional_to_duration(self):
"""Longer chunks take longer."""
short = Chunk(0, 0.0, 1.0, "/fake.mp4", 1.0)
long = Chunk(1, 0.0, 10.0, "/fake.mp4", 10.0)
proc = SimulatedDecodeProcessor(ms_per_second=50.0)
r_short = proc.process(short)
r_long = proc.process(long)
assert r_long.processing_time > r_short.processing_time
class TestCompositeProcessor:
def test_chains_processors(self, sample_chunk):
"""Composite runs all processors in sequence."""
proc = CompositeProcessor([
ChecksumProcessor(),
SimulatedDecodeProcessor(ms_per_second=1.0),
])
result = proc.process(sample_chunk)
assert result.success is True
def test_stops_on_failure(self):
"""If first processor raises, composite propagates the error."""
bad_chunk = Chunk(0, 10.0, 10.0, "/fake.mp4", 0.0) # invalid range
proc = CompositeProcessor([
ChecksumProcessor(),
SimulatedDecodeProcessor(ms_per_second=1.0),
])
with pytest.raises(ChunkChecksumError):
proc.process(bad_chunk)
def test_requires_at_least_one(self):
"""Empty processor list raises ValueError."""
with pytest.raises(ValueError, match="at least one"):
CompositeProcessor([])
def test_is_processor(self):
"""CompositeProcessor is a Processor."""
proc = CompositeProcessor([ChecksumProcessor()])
assert isinstance(proc, Processor)

115
tests/chunker/test_queue.py Normal file
View File

@@ -0,0 +1,115 @@
"""
Tests for ChunkQueue — backpressure, sentinel shutdown, timeout behavior.
Demonstrates: TDD (Interview Topic 8) — concurrency testing.
"""
import queue
import threading
import pytest
from core.chunker.queue import ChunkQueue
class TestChunkQueue:
def test_put_and_get(self, make_chunk):
"""Basic put/get cycle."""
q = ChunkQueue(maxsize=5)
chunk = make_chunk(0)
q.put(chunk)
result = q.get(timeout=1.0)
assert result.sequence == 0
def test_fifo_order(self, make_chunk):
"""Items come out in FIFO order."""
q = ChunkQueue(maxsize=5)
for i in range(3):
q.put(make_chunk(i))
for i in range(3):
assert q.get(timeout=1.0).sequence == i
def test_close_returns_none(self, make_chunk):
"""After close(), get() returns None (sentinel)."""
q = ChunkQueue(maxsize=5)
q.put(make_chunk(0))
q.close()
result = q.get(timeout=1.0)
assert result.sequence == 0
# Next get should hit sentinel
result = q.get(timeout=1.0)
assert result is None
def test_close_propagates_to_multiple_consumers(self, make_chunk):
"""Sentinel propagates: multiple consumers all get None."""
q = ChunkQueue(maxsize=5)
q.close()
# Multiple consumers should all see None
assert q.get(timeout=1.0) is None
assert q.get(timeout=1.0) is None
def test_is_closed(self):
"""is_closed reflects state."""
q = ChunkQueue()
assert not q.is_closed
q.close()
assert q.is_closed
def test_qsize(self, make_chunk):
"""qsize tracks approximate queue depth."""
q = ChunkQueue(maxsize=10)
assert q.qsize() == 0
q.put(make_chunk(0))
q.put(make_chunk(1))
assert q.qsize() == 2
q.get(timeout=1.0)
assert q.qsize() == 1
def test_backpressure_blocks(self, make_chunk):
"""Put blocks when queue is full (backpressure)."""
q = ChunkQueue(maxsize=2)
q.put(make_chunk(0))
q.put(make_chunk(1))
# Queue is full — put with short timeout should raise
with pytest.raises(queue.Full):
q.put(make_chunk(2), timeout=0.05)
def test_get_timeout(self):
"""Get on empty queue with timeout raises Empty."""
q = ChunkQueue(maxsize=5)
with pytest.raises(queue.Empty):
q.get(timeout=0.05)
def test_concurrent_put_get(self, make_chunk):
"""Producer/consumer threads work correctly."""
q = ChunkQueue(maxsize=3)
results = []
def producer():
for i in range(10):
q.put(make_chunk(i))
q.close()
def consumer():
while True:
item = q.get(timeout=2.0)
if item is None:
break
results.append(item.sequence)
t1 = threading.Thread(target=producer)
t2 = threading.Thread(target=consumer)
t1.start()
t2.start()
t1.join(timeout=5.0)
t2.join(timeout=5.0)
assert sorted(results) == list(range(10))

View File

@@ -0,0 +1,127 @@
"""
Tests for Worker — processing, retry with backoff, error handling.
Demonstrates: TDD (Interview Topic 8) — mocking processors, testing retry logic.
"""
from unittest.mock import MagicMock
import pytest
from core.chunker.models import Chunk, ChunkResult
from core.chunker.processor import Processor
from core.chunker.queue import ChunkQueue
from core.chunker.worker import Worker
class FailNTimesProcessor(Processor):
"""Test processor that fails N times then succeeds."""
def __init__(self, fail_count: int):
self.fail_count = fail_count
self.call_count = 0
def process(self, chunk: Chunk) -> ChunkResult:
self.call_count += 1
if self.call_count <= self.fail_count:
raise RuntimeError(f"Simulated failure #{self.call_count}")
return ChunkResult(
sequence=chunk.sequence,
success=True,
processing_time=0.001,
)
class AlwaysFailProcessor(Processor):
"""Test processor that always fails."""
def process(self, chunk: Chunk) -> ChunkResult:
raise RuntimeError("Always fails")
class TestWorker:
def test_processes_chunks(self, make_chunk):
"""Worker processes all chunks from queue."""
q = ChunkQueue(maxsize=5)
for i in range(3):
q.put(make_chunk(i))
q.close()
from core.chunker.processor import ChecksumProcessor
worker = Worker("w-0", q, ChecksumProcessor(), max_retries=0)
results = worker.run()
assert len(results) == 3
assert all(r.success for r in results)
def test_retry_on_failure(self, make_chunk):
"""Worker retries on processor failure."""
q = ChunkQueue(maxsize=5)
q.put(make_chunk(0))
q.close()
proc = FailNTimesProcessor(fail_count=2)
worker = Worker("w-0", q, proc, max_retries=3)
results = worker.run()
assert len(results) == 1
assert results[0].success is True
assert results[0].retries == 2
assert proc.call_count == 3 # 2 failures + 1 success
def test_max_retries_exceeded(self, make_chunk):
"""Worker gives up after max retries."""
q = ChunkQueue(maxsize=5)
q.put(make_chunk(0))
q.close()
worker = Worker("w-0", q, AlwaysFailProcessor(), max_retries=2)
results = worker.run()
assert len(results) == 1
assert results[0].success is False
assert results[0].error is not None
assert worker.error_count == 1
def test_worker_id_on_results(self, make_chunk):
"""Worker stamps its ID on results."""
q = ChunkQueue(maxsize=5)
q.put(make_chunk(0))
q.close()
from core.chunker.processor import ChecksumProcessor
worker = Worker("worker-7", q, ChecksumProcessor())
results = worker.run()
assert results[0].worker_id == "worker-7"
def test_event_callback(self, make_chunk):
"""Worker emits events via callback."""
q = ChunkQueue(maxsize=5)
q.put(make_chunk(0))
q.close()
events = []
callback = MagicMock(side_effect=lambda t, d: events.append((t, d)))
from core.chunker.processor import ChecksumProcessor
worker = Worker("w-0", q, ChecksumProcessor(), event_callback=callback)
worker.run()
event_types = [e[0] for e in events]
assert "worker_status" in event_types
assert "chunk_processing" in event_types
assert "chunk_done" in event_types
def test_processed_count(self, make_chunk):
"""Worker tracks processed count."""
q = ChunkQueue(maxsize=10)
for i in range(5):
q.put(make_chunk(i))
q.close()
from core.chunker.processor import ChecksumProcessor
worker = Worker("w-0", q, ChecksumProcessor())
worker.run()
assert worker.processed_count == 5

2
ui/chunker/.dockerignore Normal file
View File

@@ -0,0 +1,2 @@
node_modules/
dist/

12
ui/chunker/Dockerfile Normal file
View File

@@ -0,0 +1,12 @@
FROM node:20-alpine
WORKDIR /app
COPY package.json ./
RUN npm install
COPY . .
EXPOSE 5174
CMD ["npm", "run", "dev"]

12
ui/chunker/index.html Normal file
View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MPR Chunker Pipeline</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

1849
ui/chunker/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

27
ui/chunker/package.json Normal file
View File

@@ -0,0 +1,27 @@
{
"name": "mpr-chunker",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"preview": "vite preview"
},
"dependencies": {
"@protobuf-ts/grpcweb-transport": "^2.11.1",
"@protobuf-ts/runtime": "^2.11.1",
"@protobuf-ts/runtime-rpc": "^2.11.1",
"react": "^18.2.0",
"react-dom": "^18.2.0"
},
"devDependencies": {
"@protobuf-ts/plugin": "^2.11.1",
"@protobuf-ts/protoc": "^2.11.1",
"@types/react": "^18.2.0",
"@types/react-dom": "^18.2.0",
"@vitejs/plugin-react": "^4.2.0",
"typescript": "^5.3.0",
"vite": "^5.0.0"
}
}

504
ui/chunker/src/App.css Normal file
View File

@@ -0,0 +1,504 @@
@import "../../common/styles/theme.css";
/* ---- Layout ---- */
.app {
display: flex;
flex-direction: column;
height: 100vh;
}
.header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.75rem 1.25rem;
background: var(--bg-panel);
border-bottom: 1px solid var(--border);
}
.header h1 {
font-size: 1.1rem;
font-weight: 600;
letter-spacing: -0.01em;
}
.connection-status {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.8rem;
color: var(--text-muted);
}
.dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--text-muted);
}
.dot.connected {
background: var(--success);
box-shadow: 0 0 6px var(--success);
}
.error-banner {
padding: 0.5rem 1.25rem;
background: #7f1d1d;
color: #fca5a5;
font-size: 0.85rem;
}
.layout {
display: flex;
flex: 1;
overflow: hidden;
}
.sidebar {
width: 300px;
background: var(--bg-surface);
border-right: 1px solid var(--border);
overflow-y: auto;
}
.main {
flex: 1;
overflow-y: auto;
padding: 1rem;
display: flex;
flex-direction: column;
gap: 1rem;
}
.main-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
.main-left,
.main-right {
display: flex;
flex-direction: column;
gap: 1rem;
}
/* ---- Selected Asset Info ---- */
.selected-asset-info {
padding: 0.5rem;
background: #1e293b;
border: 1px solid #334155;
border-radius: var(--radius);
margin-bottom: 0.75rem;
}
.asset-detail {
display: block;
font-size: 0.8rem;
color: var(--text-primary);
font-weight: 500;
}
.asset-detail-meta {
display: block;
font-size: 0.65rem;
color: #64748b;
margin-top: 0.15rem;
}
/* ---- Config Panel ---- */
.config-panel {
padding: 1rem;
}
.config-field {
margin-bottom: 0.75rem;
}
.config-field label {
display: block;
font-size: 0.75rem;
color: var(--text-secondary);
margin-bottom: 0.25rem;
}
.config-field .default {
color: var(--text-muted);
font-style: italic;
}
.config-field input,
.config-field select {
width: 100%;
padding: 0.4rem 0.5rem;
font-size: 0.8rem;
background: var(--bg-input);
color: var(--text-primary);
border: 1px solid var(--border);
border-radius: var(--radius);
}
.config-field input:focus,
.config-field select:focus {
outline: none;
border-color: var(--accent);
}
.start-button {
width: 100%;
padding: 0.5rem;
font-size: 0.85rem;
background: var(--success);
color: #000;
border: none;
border-radius: var(--radius);
cursor: pointer;
font-weight: 600;
margin-top: 0.5rem;
transition: background 0.2s;
}
.start-button:hover:not(:disabled) {
background: #059669;
}
.start-button:disabled {
background: var(--bg-input);
color: var(--text-muted);
cursor: not-allowed;
}
.stop-button {
width: 100%;
padding: 0.5rem;
font-size: 0.85rem;
background: var(--error);
color: #fff;
border: none;
border-radius: var(--radius);
cursor: pointer;
font-weight: 600;
margin-top: 0.5rem;
transition: background 0.2s;
}
.stop-button:hover {
background: #dc2626;
}
.reset-button {
width: 100%;
padding: 0.5rem;
font-size: 0.85rem;
background: #1e293b;
color: #94a3b8;
border: 1px solid #334155;
border-radius: var(--radius);
cursor: pointer;
font-weight: 600;
margin-top: 0.5rem;
transition: all 0.2s;
}
.reset-button:hover {
background: #334155;
color: var(--text-primary);
}
.range-row {
display: flex;
align-items: center;
gap: 0.5rem;
}
.range-row input {
flex: 1;
padding: 0.4rem 0.5rem;
font-size: 0.8rem;
background: var(--bg-input);
color: var(--text-primary);
border: 1px solid var(--border);
border-radius: var(--radius);
}
.range-row input:focus {
outline: none;
border-color: var(--accent);
}
.range-sep {
font-size: 0.75rem;
color: var(--text-muted);
}
/* ---- Chunk Grid ---- */
.chunk-grid-panel {
background: var(--bg-surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
}
.chunk-count {
font-size: 0.7rem;
color: var(--text-muted);
font-weight: 400;
}
.chunk-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(32px, 1fr));
gap: 3px;
max-height: 200px;
overflow-y: auto;
}
.chunk-cell {
aspect-ratio: 1;
display: flex;
align-items: center;
justify-content: center;
font-size: 0.55rem;
color: rgba(255, 255, 255, 0.6);
border-radius: 3px;
transition: background 0.3s;
}
.chunk-legend {
display: flex;
gap: 0.75rem;
margin-top: 0.5rem;
flex-wrap: wrap;
}
.legend-item {
display: flex;
align-items: center;
gap: 0.25rem;
font-size: 0.65rem;
color: var(--text-secondary);
}
.legend-dot {
width: 8px;
height: 8px;
border-radius: 2px;
}
/* ---- Worker Panel ---- */
.worker-panel {
background: var(--bg-surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
}
.worker-cards {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.worker-card {
padding: 0.5rem 0.75rem;
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: 6px;
}
.worker-header {
display: flex;
justify-content: space-between;
align-items: center;
}
.worker-name {
font-size: 0.8rem;
font-weight: 500;
}
.worker-state {
font-size: 0.7rem;
text-transform: uppercase;
font-weight: 600;
}
.worker-chunk {
font-size: 0.7rem;
color: var(--text-muted);
margin-top: 0.15rem;
}
.worker-stats {
display: flex;
gap: 0.75rem;
font-size: 0.65rem;
color: var(--text-muted);
margin-top: 0.25rem;
}
.worker-empty {
font-size: 0.8rem;
color: var(--text-muted);
text-align: center;
padding: 1rem;
}
/* ---- Queue Gauge ---- */
.queue-gauge {
background: var(--bg-surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
}
.gauge-row {
margin-bottom: 0.5rem;
}
.gauge-label {
font-size: 0.75rem;
color: var(--text-secondary);
margin-bottom: 0.25rem;
}
.gauge-value {
color: var(--text-primary);
font-weight: 600;
}
.gauge-bar {
height: 8px;
background: var(--bg-input);
border-radius: var(--radius);
overflow: hidden;
}
.gauge-fill {
height: 100%;
border-radius: var(--radius);
transition: width 0.3s, background 0.3s;
}
.gauge-note {
font-size: 0.65rem;
color: var(--text-muted);
}
/* ---- Stats Panel ---- */
.stats-panel {
background: var(--bg-surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 0.5rem;
}
.stat {
text-align: center;
padding: 0.5rem;
background: var(--bg-panel);
border-radius: 6px;
}
.stat-value {
font-size: 1.1rem;
font-weight: 700;
color: var(--text-primary);
}
.stat-label {
font-size: 0.6rem;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: 0.05em;
margin-top: 0.15rem;
}
/* ---- Error Log ---- */
.error-log {
background: var(--bg-surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
}
.error-count {
font-size: 0.7rem;
background: #7f1d1d;
color: #fca5a5;
padding: 0.1rem 0.4rem;
border-radius: 8px;
font-weight: 400;
}
.error-entries {
max-height: 150px;
overflow-y: auto;
}
.error-empty {
font-size: 0.8rem;
color: var(--text-muted);
text-align: center;
padding: 0.5rem;
}
.error-entry {
display: flex;
gap: 0.5rem;
align-items: center;
padding: 0.35rem 0;
border-bottom: 1px solid var(--bg-panel);
font-size: 0.7rem;
flex-wrap: wrap;
}
.error-type {
color: var(--error);
font-weight: 500;
}
.error-seq {
color: var(--warning);
}
.error-worker {
color: var(--accent);
}
.error-msg {
color: var(--text-secondary);
flex: 1;
}
.error-retries {
color: #f97316;
font-size: 0.65rem;
}
/* ---- Output download link ---- */
.fm-download-link {
font-size: 0.7rem;
color: var(--accent);
text-decoration: none;
}
.fm-download-link:hover {
text-decoration: underline;
}

346
ui/chunker/src/App.tsx Normal file
View File

@@ -0,0 +1,346 @@
import { useCallback, useEffect, useMemo, useState } from "react";
import "./App.css";
import {
cancelChunkJob,
createChunkJob,
getAssets,
getChunkOutputFiles,
scanMediaFolder,
} from "./api";
import { ChunkGrid } from "./components/ChunkGrid";
import { ConfigPanel } from "./components/ConfigPanel";
import { ErrorLog } from "./components/ErrorLog";
import { OutputFiles } from "./components/OutputFiles";
import { QueueGauge } from "./components/QueueGauge";
import { StatsPanel } from "./components/StatsPanel";
import { WorkerPanel } from "./components/WorkerPanel";
import { useGrpcStream } from "./hooks/useGrpcStream";
import type {
ChunkInfo,
ChunkOutputFile,
ErrorEntry,
MediaAsset,
PipelineConfig,
PipelineStats,
WorkerInfo,
} from "./types";
export default function App() {
const [jobId, setJobId] = useState<string | null>(null);
const [celeryTaskId, setCeleryTaskId] = useState<string | null>(null);
const [running, setRunning] = useState(false);
const [error, setError] = useState<string | null>(null);
// Asset state
const [assets, setAssets] = useState<MediaAsset[]>([]);
const [selectedAsset, setSelectedAsset] = useState<MediaAsset | null>(null);
const [scanning, setScanning] = useState(false);
// Output files
const [outputFiles, setOutputFiles] = useState<ChunkOutputFile[]>([]);
const {
events,
connected,
done,
reset: resetStream,
} = useGrpcStream(jobId);
// Load assets on mount
useEffect(() => {
getAssets()
.then((data) =>
setAssets(data.sort((a, b) => a.filename.localeCompare(b.filename))),
)
.catch((e) =>
setError(e instanceof Error ? e.message : "Failed to load assets"),
);
}, []);
// Fetch output files when job completes
useEffect(() => {
if (done && jobId) {
getChunkOutputFiles(jobId)
.then(setOutputFiles)
.catch(() => setOutputFiles([]));
}
}, [done, jobId]);
const handleScan = useCallback(async () => {
setScanning(true);
setError(null);
try {
await scanMediaFolder();
const data = await getAssets();
setAssets(data.sort((a, b) => a.filename.localeCompare(b.filename)));
} catch (e) {
setError(e instanceof Error ? e.message : "Scan failed");
} finally {
setScanning(false);
}
}, []);
// Derive state from raw events
const { chunks, workers, stats, errors, queueSize } =
useMemo(() => {
const chunkMap = new Map<number, ChunkInfo>();
const workerMap = new Map<string, WorkerInfo>();
const errorList: ErrorEntry[] = [];
let totalChunks = 0;
let processed = 0;
let failed = 0;
let retries = 0;
let elapsed = 0;
let throughput = 0;
let queueSize = 0;
let pipelineDone = false;
for (const evt of events) {
const evtType = evt.event_type || "";
if (evt.total_chunks) totalChunks = evt.total_chunks;
if (evt.processed_chunks) processed = evt.processed_chunks;
if (evt.failed_chunks) failed = evt.failed_chunks;
if (evt.elapsed) elapsed = evt.elapsed;
if (evt.throughput_mbps) throughput = evt.throughput_mbps;
if (evt.queue_size !== undefined) queueSize = evt.queue_size;
if (evtType === "pipeline_complete" || evtType === "pipeline_error") {
pipelineDone = true;
queueSize = 0;
}
// Track chunks by raw event type
if (evt.sequence !== undefined) {
const existing = chunkMap.get(evt.sequence) || {
sequence: evt.sequence,
state: "pending" as const,
};
if (evtType === "chunk_queued") {
existing.state = "queued";
} else if (evtType === "chunk_processing") {
existing.state = "processing";
if (evt.worker_id) existing.worker_id = evt.worker_id;
} else if (evtType === "chunk_done") {
existing.state = "done";
if (evt.processing_time)
existing.processing_time = evt.processing_time;
if (evt.retries) existing.retries = evt.retries;
} else if (evtType === "chunk_error") {
existing.state = "error";
if (evt.error) existing.error = evt.error;
} else if (evtType === "chunk_retry") {
existing.state = "retry";
if (evt.retries) existing.retries = evt.retries;
}
if (evt.size) existing.size = evt.size;
chunkMap.set(evt.sequence, existing);
}
// Track workers from worker_status events
if (evt.worker_id && evtType === "worker_status") {
const w = workerMap.get(evt.worker_id) || {
worker_id: evt.worker_id,
state: "idle" as const,
processed: 0,
errors: 0,
retries: 0,
};
if (evt.state === "processing") {
w.state = "processing";
w.current_chunk = evt.sequence;
} else if (evt.state === "idle") {
w.state = "idle";
w.current_chunk = undefined;
} else if (evt.state === "stopped") {
w.state = "stopped";
w.current_chunk = undefined;
}
workerMap.set(evt.worker_id, w);
}
// Also update workers from chunk lifecycle events
if (
evt.worker_id &&
(evtType === "chunk_processing" ||
evtType === "chunk_done" ||
evtType === "chunk_error")
) {
const w = workerMap.get(evt.worker_id) || {
worker_id: evt.worker_id,
state: "idle" as const,
processed: 0,
errors: 0,
retries: 0,
};
if (evtType === "chunk_processing") {
w.state = "processing";
w.current_chunk = evt.sequence;
} else if (evtType === "chunk_done") {
w.processed++;
w.state = "idle";
w.current_chunk = undefined;
} else if (evtType === "chunk_error") {
w.errors++;
}
if (evt.retries) {
retries += evt.retries;
w.retries += evt.retries;
}
workerMap.set(evt.worker_id, w);
}
// Track errors
if (evt.error) {
errorList.push({
timestamp: Date.now(),
sequence: evt.sequence,
worker_id: evt.worker_id,
error: evt.error,
retries: evt.retries,
event_type: evtType,
});
}
}
// When pipeline is done, mark all workers as stopped
if (pipelineDone) {
for (const w of workerMap.values()) {
w.state = "stopped";
w.current_chunk = undefined;
}
}
const statsObj: PipelineStats = {
total_chunks: totalChunks,
processed,
failed,
retries,
elapsed,
throughput_mbps: throughput,
queue_size: queueSize,
};
return {
chunks: Array.from(chunkMap.values()).sort(
(a, b) => a.sequence - b.sequence,
),
workers: Array.from(workerMap.values()),
stats: statsObj,
errors: errorList,
queueSize,
};
}, [events]);
const handleStart = useCallback(async (config: PipelineConfig) => {
setError(null);
setRunning(true);
setOutputFiles([]);
try {
const result = await createChunkJob(config);
setJobId(result.id);
setCeleryTaskId(result.celery_task_id);
} catch (e) {
setError(e instanceof Error ? e.message : "Failed to start");
setRunning(false);
}
}, []);
const handleStop = useCallback(async () => {
if (!celeryTaskId) {
setError("No task ID to cancel");
return;
}
try {
const result = await cancelChunkJob(celeryTaskId);
if (result.ok) {
resetStream();
setRunning(false);
setError(null);
} else {
setError(result.message || "Failed to cancel");
}
} catch (e) {
setError(e instanceof Error ? e.message : "Failed to cancel");
}
}, [celeryTaskId, resetStream]);
const handleReset = useCallback(() => {
setJobId(null);
setCeleryTaskId(null);
setRunning(false);
setError(null);
setOutputFiles([]);
resetStream();
}, [resetStream]);
// Reset running state when done
if (done && running) {
setRunning(false);
}
return (
<div className="app">
<header className="header">
<h1>MPR Chunker Pipeline</h1>
<div className="connection-status">
{jobId && (
<span className={`dot ${connected ? "connected" : ""}`} />
)}
<span className="status-text">
{!jobId
? "Configure and launch"
: connected
? "Streaming"
: done
? "Complete"
: "Connecting..."}
</span>
</div>
</header>
{error && <div className="error-banner">{error}</div>}
<div className="layout">
<aside className="sidebar">
<ConfigPanel
onStart={handleStart}
onStop={handleStop}
onReset={handleReset}
running={running}
done={done}
assets={assets}
selectedAsset={selectedAsset}
onSelectAsset={setSelectedAsset}
onScan={handleScan}
scanning={scanning}
/>
</aside>
<main className="main">
<div className="main-grid">
<div className="main-left">
<ChunkGrid chunks={chunks} totalChunks={stats.total_chunks} />
<QueueGauge current={queueSize} max={10} buffered={0} />
{done && outputFiles.length > 0 && (
<OutputFiles files={outputFiles} />
)}
</div>
<div className="main-right">
<WorkerPanel workers={workers} />
<StatsPanel stats={stats} />
<ErrorLog errors={errors} />
</div>
</div>
</main>
</div>
</div>
);
}

85
ui/chunker/src/api.ts Normal file
View File

@@ -0,0 +1,85 @@
/**
* Chunker-specific API functions.
* Shared functions (getAssets, scanMediaFolder) come from common.
*/
import { gql } from "../../common/api/graphql";
import type { ChunkOutputFile } from "../../common/types/generated";
// Re-export shared functions
export { getAssets, scanMediaFolder } from "../../common/api/media";
/** Create a chunk job via GraphQL mutation. */
export async function createChunkJob(config: {
source_asset_id: string;
chunk_duration: number;
num_workers: number;
max_retries: number;
processor_type: string;
start_time?: number | null;
end_time?: number | null;
}): Promise<{ id: string; celery_task_id: string | null }> {
const data = await gql<{
create_chunk_job: {
id: string;
status: string;
celery_task_id: string | null;
};
}>(
`
mutation CreateChunkJob($input: CreateChunkJobInput!) {
create_chunk_job(input: $input) {
id
status
celery_task_id
}
}
`,
{ input: config },
);
return data.create_chunk_job;
}
/** Cancel a running chunk job. */
export async function cancelChunkJob(
celeryTaskId: string,
): Promise<{ ok: boolean; message: string | null }> {
const data = await gql<{
cancel_chunk_job: { ok: boolean; message: string | null };
}>(
`
mutation CancelChunkJob($celery_task_id: String!) {
cancel_chunk_job(celery_task_id: $celery_task_id) {
ok
message
}
}
`,
{ celery_task_id: celeryTaskId },
);
return data.cancel_chunk_job;
}
/** Fetch output chunk files for a completed job. */
export async function getChunkOutputFiles(
jobId: string,
): Promise<ChunkOutputFile[]> {
const data = await gql<{
chunk_output_files: ChunkOutputFile[];
}>(
`
query ChunkOutputFiles($job_id: String!) {
chunk_output_files(job_id: $job_id) {
key
size
url
}
}
`,
{ job_id: jobId },
);
return data.chunk_output_files;
}

View File

@@ -0,0 +1,52 @@
import type { ChunkInfo } from "../types";
interface Props {
chunks: ChunkInfo[];
totalChunks: number;
}
const STATE_COLORS: Record<string, string> = {
pending: "var(--bg-input)",
queued: "var(--warning)",
processing: "var(--processing)",
done: "var(--success)",
error: "var(--error)",
retry: "#f97316",
};
export function ChunkGrid({ chunks, totalChunks }: Props) {
return (
<div className="chunk-grid-panel">
<div className="panel-header">
<h2>
Chunks{" "}
<span className="chunk-count">
{chunks.length} / {totalChunks || "?"}
</span>
</h2>
</div>
<div className="chunk-grid">
{chunks.map((chunk) => (
<div
key={chunk.sequence}
className="chunk-cell"
style={{ background: STATE_COLORS[chunk.state] || "var(--bg-input)" }}
title={`#${chunk.sequence}${chunk.state}${
chunk.worker_id ? ` (${chunk.worker_id})` : ""
}${chunk.retries ? ` retries: ${chunk.retries}` : ""}`}
>
{chunk.sequence}
</div>
))}
</div>
<div className="chunk-legend">
{Object.entries(STATE_COLORS).map(([state, color]) => (
<span key={state} className="legend-item">
<span className="legend-dot" style={{ background: color }} />
{state}
</span>
))}
</div>
</div>
);
}

View File

@@ -0,0 +1,218 @@
import { useMemo, useState } from "react";
import { FileManager } from "../../../common/components/FileManager";
import type { FileEntry } from "../../../common/components/FileManager";
import { formatDuration, formatSize } from "../../../common/utils/format";
import type { MediaAsset, PipelineConfig } from "../types";
interface Props {
onStart: (config: PipelineConfig) => void;
onStop: () => void;
onReset: () => void;
running: boolean;
done: boolean;
assets: MediaAsset[];
selectedAsset: MediaAsset | null;
onSelectAsset: (asset: MediaAsset) => void;
onScan: () => void;
scanning: boolean;
}
export function ConfigPanel({
onStart,
onStop,
onReset,
running,
done,
assets,
selectedAsset,
onSelectAsset,
onScan,
scanning,
}: Props) {
const [chunkDuration, setChunkDuration] = useState(10.0);
const [numWorkers, setNumWorkers] = useState(4);
const [maxRetries, setMaxRetries] = useState(3);
const [processorType, setProcessorType] = useState<
"ffmpeg" | "checksum" | "simulated_decode" | "composite"
>("ffmpeg");
const [startTime, setStartTime] = useState<string>("");
const [endTime, setEndTime] = useState<string>("");
// Map assets to FileEntry for FileManager
const fileEntries: FileEntry[] = useMemo(
() =>
assets.map((a) => ({
key: a.id,
name: a.filename,
size: a.file_size ?? undefined,
meta: formatDuration(a.duration),
})),
[assets],
);
const handleFileSelect = (file: FileEntry) => {
const asset = assets.find((a) => a.id === file.key);
if (asset) onSelectAsset(asset);
};
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (!selectedAsset) return;
onStart({
source_asset_id: selectedAsset.id,
chunk_duration: chunkDuration,
num_workers: numWorkers,
max_retries: maxRetries,
processor_type: processorType,
start_time: startTime ? parseFloat(startTime) : null,
end_time: endTime ? parseFloat(endTime) : null,
});
};
return (
<div className="config-panel">
<FileManager
title="Assets"
files={fileEntries}
selectedKey={selectedAsset?.id ?? null}
onSelect={handleFileSelect}
onScan={onScan}
scanning={scanning}
emptyMessage="No assets — click Scan Folder"
disabled={running}
/>
{selectedAsset && (
<div className="selected-asset-info">
<span className="asset-detail">{selectedAsset.filename}</span>
<span className="asset-detail-meta">
{selectedAsset.video_codec} · {selectedAsset.width}x
{selectedAsset.height} · {formatDuration(selectedAsset.duration)} ·{" "}
{formatSize(selectedAsset.file_size)}
</span>
</div>
)}
{/* Pipeline Config */}
<div className="panel-header" style={{ marginTop: "1rem" }}>
<h2>Pipeline Config</h2>
</div>
<form onSubmit={handleSubmit}>
<div className="config-field">
<label>
Time Range (seconds){" "}
<span className="default">optional limits what gets chunked</span>
</label>
<div className="range-row">
<input
type="number"
min={0}
step={1}
placeholder="start"
value={startTime}
onChange={(e) => setStartTime(e.target.value)}
disabled={running}
/>
<span className="range-sep">to</span>
<input
type="number"
min={0}
step={1}
placeholder="end"
value={endTime}
onChange={(e) => setEndTime(e.target.value)}
disabled={running}
/>
</div>
</div>
<div className="config-field">
<label>
Chunk Duration <span className="default">default: 10s</span>
</label>
<select
value={chunkDuration}
onChange={(e) => setChunkDuration(Number(e.target.value))}
disabled={running}
>
<option value={5}>5 seconds</option>
<option value={10}>10 seconds</option>
<option value={15}>15 seconds</option>
<option value={30}>30 seconds</option>
<option value={60}>60 seconds</option>
</select>
</div>
<div className="config-field">
<label>
Workers <span className="default">default: 4</span>
</label>
<input
type="number"
min={1}
max={16}
value={numWorkers}
onChange={(e) => setNumWorkers(Number(e.target.value))}
disabled={running}
/>
</div>
<div className="config-field">
<label>
Max Retries <span className="default">default: 3</span>
</label>
<input
type="number"
min={0}
max={10}
value={maxRetries}
onChange={(e) => setMaxRetries(Number(e.target.value))}
disabled={running}
/>
</div>
<div className="config-field">
<label>
Processor <span className="default">default: ffmpeg</span>
</label>
<select
value={processorType}
onChange={(e) =>
setProcessorType(
e.target.value as
| "ffmpeg"
| "checksum"
| "simulated_decode"
| "composite",
)
}
disabled={running}
>
<option value="ffmpeg">FFmpegExtractProcessor</option>
<option value="checksum">ChecksumProcessor</option>
<option value="simulated_decode">SimulatedDecodeProcessor</option>
<option value="composite">CompositeProcessor</option>
</select>
</div>
{!running && !done && (
<button
type="submit"
className="start-button"
disabled={!selectedAsset}
>
Launch Pipeline
</button>
)}
</form>
{running && (
<button type="button" className="stop-button" onClick={onStop}>
Stop Pipeline
</button>
)}
{done && (
<button type="button" className="reset-button" onClick={onReset}>
Reset
</button>
)}
</div>
);
}

View File

@@ -0,0 +1,40 @@
import type { ErrorEntry } from "../types";
interface Props {
errors: ErrorEntry[];
}
export function ErrorLog({ errors }: Props) {
return (
<div className="error-log">
<div className="panel-header">
<h2>
Errors & Retries{" "}
<span className="error-count">{errors.length}</span>
</h2>
</div>
<div className="error-entries">
{errors.length === 0 && (
<div className="error-empty">No errors recorded</div>
)}
{errors.map((entry, i) => (
<div key={i} className="error-entry">
<span className="error-type">{entry.event_type}</span>
{entry.sequence !== undefined && (
<span className="error-seq">chunk #{entry.sequence}</span>
)}
{entry.worker_id && (
<span className="error-worker">{entry.worker_id}</span>
)}
<span className="error-msg">{entry.error}</span>
{entry.retries !== undefined && entry.retries > 0 && (
<span className="error-retries">
{entry.retries} retries
</span>
)}
</div>
))}
</div>
</div>
);
}

View File

@@ -0,0 +1,51 @@
import { useMemo } from "react";
import { FileManager } from "../../../common/components/FileManager";
import type { FileEntry } from "../../../common/components/FileManager";
import { formatSize } from "../../../common/utils/format";
import type { ChunkOutputFile } from "../types";
interface Props {
files: ChunkOutputFile[];
}
export function OutputFiles({ files }: Props) {
const fileEntries: FileEntry[] = useMemo(
() =>
files.map((f) => ({
key: f.key,
name: f.key.split("/").pop() || f.key,
size: f.size,
})),
[files],
);
const urlMap = useMemo(() => {
const map = new Map<string, string>();
for (const f of files) {
map.set(f.key, f.url);
}
return map;
}, [files]);
return (
<FileManager
title="Output Files"
files={fileEntries}
emptyMessage="No output files"
renderActions={(file) => {
const url = urlMap.get(file.key);
if (!url) return null;
return (
<a
href={url}
download
className="fm-download-link"
onClick={(e) => e.stopPropagation()}
>
{formatSize(file.size)}
</a>
);
}}
/>
);
}

View File

@@ -0,0 +1,39 @@
interface Props {
current: number;
max: number;
buffered: number;
}
export function QueueGauge({ current, max, buffered }: Props) {
const fillPct = max > 0 ? Math.min((current / max) * 100, 100) : 0;
return (
<div className="queue-gauge">
<div className="panel-header">
<h2>Queue & Buffer</h2>
</div>
<div className="gauge-row">
<div className="gauge-label">
Queue <span className="gauge-value">{current}/{max}</span>
</div>
<div className="gauge-bar">
<div
className="gauge-fill"
style={{
width: `${fillPct}%`,
background: fillPct > 80 ? "var(--error)" : "var(--processing)",
}}
/>
</div>
</div>
<div className="gauge-row">
<div className="gauge-label">
Heap Buffer <span className="gauge-value">{buffered}</span>
</div>
<div className="gauge-note">
Out-of-order results waiting for gaps to fill
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,43 @@
import type { PipelineStats } from "../types";
interface Props {
stats: PipelineStats;
}
export function StatsPanel({ stats }: Props) {
return (
<div className="stats-panel">
<div className="panel-header">
<h2>Stats</h2>
</div>
<div className="stats-grid">
<div className="stat">
<div className="stat-value">{stats.total_chunks}</div>
<div className="stat-label">Total Chunks</div>
</div>
<div className="stat">
<div className="stat-value">{stats.processed}</div>
<div className="stat-label">Processed</div>
</div>
<div className="stat">
<div className="stat-value">{stats.failed}</div>
<div className="stat-label">Failed</div>
</div>
<div className="stat">
<div className="stat-value">{stats.retries}</div>
<div className="stat-label">Retries</div>
</div>
<div className="stat">
<div className="stat-value">
{stats.throughput_mbps.toFixed(2)}
</div>
<div className="stat-label">MB/s</div>
</div>
<div className="stat">
<div className="stat-value">{stats.elapsed.toFixed(2)}s</div>
<div className="stat-label">Elapsed</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,48 @@
import type { WorkerInfo } from "../types";
interface Props {
workers: WorkerInfo[];
}
const STATE_COLORS: Record<string, string> = {
idle: "var(--text-muted)",
processing: "var(--processing)",
retry: "#f97316",
stopped: "var(--error)",
};
export function WorkerPanel({ workers }: Props) {
return (
<div className="worker-panel">
<div className="panel-header">
<h2>Workers</h2>
</div>
<div className="worker-cards">
{workers.map((w) => (
<div key={w.worker_id} className="worker-card">
<div className="worker-header">
<span className="worker-name">{w.worker_id}</span>
<span
className="worker-state"
style={{ color: STATE_COLORS[w.state] || "var(--text-secondary)" }}
>
{w.state}
</span>
</div>
{w.current_chunk !== undefined && (
<div className="worker-chunk">chunk #{w.current_chunk}</div>
)}
<div className="worker-stats">
<span>done: {w.processed}</span>
<span>err: {w.errors}</span>
<span>retry: {w.retries}</span>
</div>
</div>
))}
{workers.length === 0 && (
<div className="worker-empty">No workers started</div>
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,92 @@
import { useCallback, useEffect, useRef, useState } from "react";
import type { PipelineEvent } from "../types";
/**
* SSE hook — connects to /api/chunker/stream/{jobId} via native EventSource.
*/
export function useEventStream(jobId: string | null) {
const [events, setEvents] = useState<PipelineEvent[]>([]);
const [connected, setConnected] = useState(false);
const [done, setDone] = useState(false);
const esRef = useRef<EventSource | null>(null);
const close = useCallback(() => {
if (esRef.current) {
esRef.current.close();
esRef.current = null;
setConnected(false);
}
}, []);
const reset = useCallback(() => {
close();
setEvents([]);
setDone(false);
}, [close]);
useEffect(() => {
if (!jobId) return;
setEvents([]);
setDone(false);
const es = new EventSource(`/api/chunker/stream/${jobId}`);
esRef.current = es;
es.onopen = () => setConnected(true);
es.onerror = () => setConnected(false);
const handleEvent = (eventType: string) => (e: MessageEvent) => {
try {
const data = JSON.parse(e.data) as PipelineEvent;
setEvents((prev) => [...prev, { ...data, event_type: eventType }]);
} catch {
// ignore parse errors
}
};
// Listen to all raw pipeline event types
const eventTypes = [
"waiting",
"pipeline_start",
"pipeline_info",
"chunk_queued",
"chunk_processing",
"chunk_done",
"chunk_retry",
"chunk_error",
"chunk_collected",
"worker_status",
"pipeline_progress",
"pipeline_complete",
"pipeline_error",
"producer_error",
"cancelled",
"done",
"timeout",
];
for (const type of eventTypes) {
es.addEventListener(type, handleEvent(type));
}
es.addEventListener("done", () => {
setDone(true);
es.close();
setConnected(false);
});
es.addEventListener("timeout", () => {
setDone(true);
es.close();
setConnected(false);
});
return () => {
es.close();
esRef.current = null;
};
}, [jobId]);
return { events, connected, done, close, reset };
}

View File

@@ -0,0 +1,103 @@
import { useCallback, useEffect, useRef, useState } from "react";
import { GrpcWebFetchTransport } from "@protobuf-ts/grpcweb-transport";
import { WorkerServiceClient } from "../../../common/api/grpc/worker.client";
import type { ChunkPipelineEvent } from "../../../common/api/grpc/worker";
import type { PipelineEvent } from "../types";
const GRPC_WEB_URL = "/grpc-web";
function toEvent(msg: ChunkPipelineEvent): PipelineEvent {
return {
event_type: msg.eventType,
job_id: msg.jobId,
sequence: msg.sequence || undefined,
worker_id: msg.workerId || undefined,
state: msg.state || undefined,
queue_size: msg.queueSize || undefined,
elapsed: msg.elapsed || undefined,
throughput_mbps: msg.throughputMbps || undefined,
total_chunks: msg.totalChunks || undefined,
processed_chunks: msg.processedChunks || undefined,
failed_chunks: msg.failedChunks || undefined,
error: msg.error || undefined,
processing_time: msg.processingTime || undefined,
retries: msg.retries || undefined,
};
}
/**
* gRPC-Web streaming hook — connects to WorkerService.StreamChunkPipeline
* via Envoy proxy. Replaces useEventStream (SSE+Redis).
*/
export function useGrpcStream(jobId: string | null) {
const [events, setEvents] = useState<PipelineEvent[]>([]);
const [connected, setConnected] = useState(false);
const [done, setDone] = useState(false);
const abortRef = useRef<AbortController | null>(null);
const close = useCallback(() => {
if (abortRef.current) {
abortRef.current.abort();
abortRef.current = null;
setConnected(false);
}
}, []);
const reset = useCallback(() => {
close();
setEvents([]);
setDone(false);
}, [close]);
useEffect(() => {
if (!jobId) return;
setEvents([]);
setDone(false);
const abort = new AbortController();
abortRef.current = abort;
const transport = new GrpcWebFetchTransport({
baseUrl: GRPC_WEB_URL,
abort: abort.signal,
});
const client = new WorkerServiceClient(transport);
const stream = client.streamChunkPipeline({ jobId });
setConnected(true);
(async () => {
try {
for await (const msg of stream.responses) {
const evt = toEvent(msg);
setEvents((prev) => [...prev, evt]);
if (
evt.event_type === "pipeline_complete" ||
evt.event_type === "pipeline_error"
) {
setDone(true);
setConnected(false);
break;
}
}
} catch (err) {
if (!abort.signal.aborted) {
setConnected(false);
}
} finally {
setConnected(false);
}
})();
return () => {
abort.abort();
abortRef.current = null;
};
}, [jobId]);
return { events, connected, done, close, reset };
}

9
ui/chunker/src/main.tsx Normal file
View File

@@ -0,0 +1,9 @@
import React from "react";
import ReactDOM from "react-dom/client";
import App from "./App";
ReactDOM.createRoot(document.getElementById("app")!).render(
<React.StrictMode>
<App />
</React.StrictMode>
);

96
ui/chunker/src/types.ts Normal file
View File

@@ -0,0 +1,96 @@
/**
* Chunker UI types.
*
* Domain types (MediaAsset, ChunkEvent, etc.) come from generated schema.
* This file holds UI-only types: state enums, SSE envelope, derived views.
*/
// Re-export generated types used by this app
export type {
MediaAsset,
ChunkEvent,
WorkerEvent,
PipelineStats,
ChunkOutputFile,
} from "../../common/types/generated";
/** Pipeline configuration sent to the backend. */
export interface PipelineConfig {
source_asset_id: string;
chunk_duration: number;
num_workers: number;
max_retries: number;
processor_type: "ffmpeg" | "checksum" | "simulated_decode" | "composite";
start_time?: number | null;
end_time?: number | null;
}
/** UI state of an individual chunk in the grid. */
export type ChunkState =
| "pending"
| "queued"
| "processing"
| "done"
| "error"
| "retry";
/** Tracked chunk in the UI grid (derived from events). */
export interface ChunkInfo {
sequence: number;
state: ChunkState;
size?: number;
worker_id?: string;
retries?: number;
processing_time?: number;
error?: string;
}
/** Worker thread status (derived from events). */
export interface WorkerInfo {
worker_id: string;
state: "idle" | "processing" | "retry" | "stopped";
current_chunk?: number;
processed: number;
errors: number;
retries: number;
}
/**
* Raw SSE event envelope from the backend.
* The event_type field is set by useEventStream from the SSE event name.
* All other fields are optional — presence depends on event_type.
*/
export interface PipelineEvent {
job_id?: string;
event_type?: string;
status?: string;
progress?: number;
total_chunks?: number;
processed_chunks?: number;
failed_chunks?: number;
throughput_mbps?: number;
elapsed?: number;
error?: string;
// Chunk-level fields
sequence?: number;
size?: number;
worker_id?: string;
success?: boolean;
processing_time?: number;
retries?: number;
queue_size?: number;
// Worker-level fields
state?: string;
attempt?: number;
backoff?: number;
}
/** Error log entry (derived from events). */
export interface ErrorEntry {
timestamp: number;
sequence?: number;
worker_id?: string;
error: string;
retries?: number;
event_type: string;
}

1
ui/chunker/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

26
ui/chunker/tsconfig.json Normal file
View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"target": "ES2020",
"useDefineForClassFields": true,
"module": "ESNext",
"lib": ["ES2020", "DOM", "DOM.Iterable"],
"skipLibCheck": true,
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx",
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"rootDir": "..",
"typeRoots": ["./node_modules/@types"],
"paths": {
"@protobuf-ts/*": ["./node_modules/@protobuf-ts/*"]
}
},
"include": ["src/**/*.ts", "src/**/*.tsx", "../common/**/*.ts", "../common/**/*.tsx"],
"references": [{ "path": "./tsconfig.node.json" }]
}

View File

@@ -0,0 +1,10 @@
{
"compilerOptions": {
"composite": true,
"skipLibCheck": true,
"module": "ESNext",
"moduleResolution": "bundler",
"allowSyntheticDefaultImports": true
},
"include": ["vite.config.ts"]
}

50
ui/chunker/vite.config.ts Normal file
View File

@@ -0,0 +1,50 @@
import path from "path";
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
export default defineConfig({
base: "/chunker/",
plugins: [react()],
resolve: {
alias: {
"@protobuf-ts/runtime": path.resolve(
__dirname,
"node_modules/@protobuf-ts/runtime",
),
"@protobuf-ts/runtime-rpc": path.resolve(
__dirname,
"node_modules/@protobuf-ts/runtime-rpc",
),
"@protobuf-ts/grpcweb-transport": path.resolve(
__dirname,
"node_modules/@protobuf-ts/grpcweb-transport",
),
},
},
server: {
host: "0.0.0.0",
port: 5174,
allowedHosts: process.env.VITE_ALLOWED_HOSTS?.split(",") || [],
hmr: {
path: "/chunker/@vite/client",
},
fs: {
allow: [".."],
},
proxy: {
"/api": {
target: "http://fastapi:8702",
changeOrigin: true,
},
"/graphql": {
target: "http://fastapi:8702",
changeOrigin: true,
},
"/grpc-web": {
target: "http://envoy:8090",
changeOrigin: true,
rewrite: (p) => p.replace(/^\/grpc-web/, ""),
},
},
},
});

24
ui/common/api/graphql.ts Normal file
View File

@@ -0,0 +1,24 @@
/**
* Shared GraphQL client for all MPR UI apps.
*/
const GRAPHQL_URL = "/api/graphql";
export async function gql<T>(
query: string,
variables?: Record<string, unknown>,
): Promise<T> {
const response = await fetch(GRAPHQL_URL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ query, variables }),
});
const json = await response.json();
if (json.errors?.length) {
throw new Error(json.errors[0].message);
}
return json.data as T;
}

View File

@@ -0,0 +1,95 @@
// @generated by protobuf-ts 2.11.1
// @generated from protobuf file "worker.proto" (package "mpr.worker", syntax proto3)
// tslint:disable
//
// Protocol Buffer Definitions - GENERATED FILE
//
// Do not edit directly. Regenerate using modelgen.
//
import type { RpcTransport } from "@protobuf-ts/runtime-rpc";
import type { ServiceInfo } from "@protobuf-ts/runtime-rpc";
import { WorkerService } from "./worker";
import type { ChunkPipelineEvent } from "./worker";
import type { ChunkStreamRequest } from "./worker";
import type { WorkerStatus } from "./worker";
import type { Empty } from "./worker";
import type { CancelResponse } from "./worker";
import type { CancelRequest } from "./worker";
import type { ProgressUpdate } from "./worker";
import type { ProgressRequest } from "./worker";
import type { ServerStreamingCall } from "@protobuf-ts/runtime-rpc";
import { stackIntercept } from "@protobuf-ts/runtime-rpc";
import type { JobResponse } from "./worker";
import type { JobRequest } from "./worker";
import type { UnaryCall } from "@protobuf-ts/runtime-rpc";
import type { RpcOptions } from "@protobuf-ts/runtime-rpc";
/**
* @generated from protobuf service mpr.worker.WorkerService
*/
export interface IWorkerServiceClient {
/**
* @generated from protobuf rpc: SubmitJob
*/
submitJob(input: JobRequest, options?: RpcOptions): UnaryCall<JobRequest, JobResponse>;
/**
* @generated from protobuf rpc: StreamProgress
*/
streamProgress(input: ProgressRequest, options?: RpcOptions): ServerStreamingCall<ProgressRequest, ProgressUpdate>;
/**
* @generated from protobuf rpc: CancelJob
*/
cancelJob(input: CancelRequest, options?: RpcOptions): UnaryCall<CancelRequest, CancelResponse>;
/**
* @generated from protobuf rpc: GetWorkerStatus
*/
getWorkerStatus(input: Empty, options?: RpcOptions): UnaryCall<Empty, WorkerStatus>;
/**
* @generated from protobuf rpc: StreamChunkPipeline
*/
streamChunkPipeline(input: ChunkStreamRequest, options?: RpcOptions): ServerStreamingCall<ChunkStreamRequest, ChunkPipelineEvent>;
}
/**
* @generated from protobuf service mpr.worker.WorkerService
*/
export class WorkerServiceClient implements IWorkerServiceClient, ServiceInfo {
typeName = WorkerService.typeName;
methods = WorkerService.methods;
options = WorkerService.options;
constructor(private readonly _transport: RpcTransport) {
}
/**
* @generated from protobuf rpc: SubmitJob
*/
submitJob(input: JobRequest, options?: RpcOptions): UnaryCall<JobRequest, JobResponse> {
const method = this.methods[0], opt = this._transport.mergeOptions(options);
return stackIntercept<JobRequest, JobResponse>("unary", this._transport, method, opt, input);
}
/**
* @generated from protobuf rpc: StreamProgress
*/
streamProgress(input: ProgressRequest, options?: RpcOptions): ServerStreamingCall<ProgressRequest, ProgressUpdate> {
const method = this.methods[1], opt = this._transport.mergeOptions(options);
return stackIntercept<ProgressRequest, ProgressUpdate>("serverStreaming", this._transport, method, opt, input);
}
/**
* @generated from protobuf rpc: CancelJob
*/
cancelJob(input: CancelRequest, options?: RpcOptions): UnaryCall<CancelRequest, CancelResponse> {
const method = this.methods[2], opt = this._transport.mergeOptions(options);
return stackIntercept<CancelRequest, CancelResponse>("unary", this._transport, method, opt, input);
}
/**
* @generated from protobuf rpc: GetWorkerStatus
*/
getWorkerStatus(input: Empty, options?: RpcOptions): UnaryCall<Empty, WorkerStatus> {
const method = this.methods[3], opt = this._transport.mergeOptions(options);
return stackIntercept<Empty, WorkerStatus>("unary", this._transport, method, opt, input);
}
/**
* @generated from protobuf rpc: StreamChunkPipeline
*/
streamChunkPipeline(input: ChunkStreamRequest, options?: RpcOptions): ServerStreamingCall<ChunkStreamRequest, ChunkPipelineEvent> {
const method = this.methods[4], opt = this._transport.mergeOptions(options);
return stackIntercept<ChunkStreamRequest, ChunkPipelineEvent>("serverStreaming", this._transport, method, opt, input);
}
}

View File

@@ -0,0 +1,946 @@
// @generated by protobuf-ts 2.11.1
// @generated from protobuf file "worker.proto" (package "mpr.worker", syntax proto3)
// tslint:disable
//
// Protocol Buffer Definitions - GENERATED FILE
//
// Do not edit directly. Regenerate using modelgen.
//
import { ServiceType } from "@protobuf-ts/runtime-rpc";
import type { BinaryWriteOptions } from "@protobuf-ts/runtime";
import type { IBinaryWriter } from "@protobuf-ts/runtime";
import { WireType } from "@protobuf-ts/runtime";
import type { BinaryReadOptions } from "@protobuf-ts/runtime";
import type { IBinaryReader } from "@protobuf-ts/runtime";
import { UnknownFieldHandler } from "@protobuf-ts/runtime";
import type { PartialMessage } from "@protobuf-ts/runtime";
import { reflectionMergePartial } from "@protobuf-ts/runtime";
import { MessageType } from "@protobuf-ts/runtime";
/**
* @generated from protobuf message mpr.worker.JobRequest
*/
export interface JobRequest {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
/**
* @generated from protobuf field: string source_path = 2
*/
sourcePath: string;
/**
* @generated from protobuf field: string output_path = 3
*/
outputPath: string;
/**
* @generated from protobuf field: string preset_json = 4
*/
presetJson: string;
/**
* @generated from protobuf field: optional float trim_start = 5
*/
trimStart?: number;
/**
* @generated from protobuf field: optional float trim_end = 6
*/
trimEnd?: number;
}
/**
* @generated from protobuf message mpr.worker.JobResponse
*/
export interface JobResponse {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
/**
* @generated from protobuf field: bool accepted = 2
*/
accepted: boolean;
/**
* @generated from protobuf field: string message = 3
*/
message: string;
}
/**
* @generated from protobuf message mpr.worker.ProgressRequest
*/
export interface ProgressRequest {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
}
/**
* @generated from protobuf message mpr.worker.ProgressUpdate
*/
export interface ProgressUpdate {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
/**
* @generated from protobuf field: int32 progress = 2
*/
progress: number;
/**
* @generated from protobuf field: int32 current_frame = 3
*/
currentFrame: number;
/**
* @generated from protobuf field: float current_time = 4
*/
currentTime: number;
/**
* @generated from protobuf field: float speed = 5
*/
speed: number;
/**
* @generated from protobuf field: string status = 6
*/
status: string;
/**
* @generated from protobuf field: optional string error = 7
*/
error?: string;
}
/**
* @generated from protobuf message mpr.worker.CancelRequest
*/
export interface CancelRequest {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
}
/**
* @generated from protobuf message mpr.worker.CancelResponse
*/
export interface CancelResponse {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
/**
* @generated from protobuf field: bool cancelled = 2
*/
cancelled: boolean;
/**
* @generated from protobuf field: string message = 3
*/
message: string;
}
/**
* @generated from protobuf message mpr.worker.WorkerStatus
*/
export interface WorkerStatus {
/**
* @generated from protobuf field: bool available = 1
*/
available: boolean;
/**
* @generated from protobuf field: int32 active_jobs = 2
*/
activeJobs: number;
/**
* @generated from protobuf field: repeated string supported_codecs = 3
*/
supportedCodecs: string[];
/**
* @generated from protobuf field: bool gpu_available = 4
*/
gpuAvailable: boolean;
}
/**
* Empty
*
* @generated from protobuf message mpr.worker.Empty
*/
export interface Empty {
}
/**
* @generated from protobuf message mpr.worker.ChunkStreamRequest
*/
export interface ChunkStreamRequest {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
}
/**
* @generated from protobuf message mpr.worker.ChunkPipelineEvent
*/
export interface ChunkPipelineEvent {
/**
* @generated from protobuf field: string job_id = 1
*/
jobId: string;
/**
* @generated from protobuf field: string event_type = 2
*/
eventType: string;
/**
* @generated from protobuf field: int32 sequence = 3
*/
sequence: number;
/**
* @generated from protobuf field: string worker_id = 4
*/
workerId: string;
/**
* @generated from protobuf field: string state = 5
*/
state: string;
/**
* @generated from protobuf field: int32 queue_size = 6
*/
queueSize: number;
/**
* @generated from protobuf field: float elapsed = 7
*/
elapsed: number;
/**
* @generated from protobuf field: float throughput_mbps = 8
*/
throughputMbps: number;
/**
* @generated from protobuf field: int32 total_chunks = 9
*/
totalChunks: number;
/**
* @generated from protobuf field: int32 processed_chunks = 10
*/
processedChunks: number;
/**
* @generated from protobuf field: int32 failed_chunks = 11
*/
failedChunks: number;
/**
* @generated from protobuf field: string error = 12
*/
error: string;
/**
* @generated from protobuf field: float processing_time = 13
*/
processingTime: number;
/**
* @generated from protobuf field: int32 retries = 14
*/
retries: number;
}
// @generated message type with reflection information, may provide speed optimized methods
class JobRequest$Type extends MessageType<JobRequest> {
constructor() {
super("mpr.worker.JobRequest", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 2, name: "source_path", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 3, name: "output_path", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 4, name: "preset_json", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 5, name: "trim_start", kind: "scalar", opt: true, T: 2 /*ScalarType.FLOAT*/ },
{ no: 6, name: "trim_end", kind: "scalar", opt: true, T: 2 /*ScalarType.FLOAT*/ }
]);
}
create(value?: PartialMessage<JobRequest>): JobRequest {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
message.sourcePath = "";
message.outputPath = "";
message.presetJson = "";
if (value !== undefined)
reflectionMergePartial<JobRequest>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: JobRequest): JobRequest {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
case /* string source_path */ 2:
message.sourcePath = reader.string();
break;
case /* string output_path */ 3:
message.outputPath = reader.string();
break;
case /* string preset_json */ 4:
message.presetJson = reader.string();
break;
case /* optional float trim_start */ 5:
message.trimStart = reader.float();
break;
case /* optional float trim_end */ 6:
message.trimEnd = reader.float();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: JobRequest, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
/* string source_path = 2; */
if (message.sourcePath !== "")
writer.tag(2, WireType.LengthDelimited).string(message.sourcePath);
/* string output_path = 3; */
if (message.outputPath !== "")
writer.tag(3, WireType.LengthDelimited).string(message.outputPath);
/* string preset_json = 4; */
if (message.presetJson !== "")
writer.tag(4, WireType.LengthDelimited).string(message.presetJson);
/* optional float trim_start = 5; */
if (message.trimStart !== undefined)
writer.tag(5, WireType.Bit32).float(message.trimStart);
/* optional float trim_end = 6; */
if (message.trimEnd !== undefined)
writer.tag(6, WireType.Bit32).float(message.trimEnd);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.JobRequest
*/
export const JobRequest = new JobRequest$Type();
// @generated message type with reflection information, may provide speed optimized methods
class JobResponse$Type extends MessageType<JobResponse> {
constructor() {
super("mpr.worker.JobResponse", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 2, name: "accepted", kind: "scalar", T: 8 /*ScalarType.BOOL*/ },
{ no: 3, name: "message", kind: "scalar", T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<JobResponse>): JobResponse {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
message.accepted = false;
message.message = "";
if (value !== undefined)
reflectionMergePartial<JobResponse>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: JobResponse): JobResponse {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
case /* bool accepted */ 2:
message.accepted = reader.bool();
break;
case /* string message */ 3:
message.message = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: JobResponse, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
/* bool accepted = 2; */
if (message.accepted !== false)
writer.tag(2, WireType.Varint).bool(message.accepted);
/* string message = 3; */
if (message.message !== "")
writer.tag(3, WireType.LengthDelimited).string(message.message);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.JobResponse
*/
export const JobResponse = new JobResponse$Type();
// @generated message type with reflection information, may provide speed optimized methods
class ProgressRequest$Type extends MessageType<ProgressRequest> {
constructor() {
super("mpr.worker.ProgressRequest", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<ProgressRequest>): ProgressRequest {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
if (value !== undefined)
reflectionMergePartial<ProgressRequest>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: ProgressRequest): ProgressRequest {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: ProgressRequest, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.ProgressRequest
*/
export const ProgressRequest = new ProgressRequest$Type();
// @generated message type with reflection information, may provide speed optimized methods
class ProgressUpdate$Type extends MessageType<ProgressUpdate> {
constructor() {
super("mpr.worker.ProgressUpdate", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 2, name: "progress", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 3, name: "current_frame", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 4, name: "current_time", kind: "scalar", T: 2 /*ScalarType.FLOAT*/ },
{ no: 5, name: "speed", kind: "scalar", T: 2 /*ScalarType.FLOAT*/ },
{ no: 6, name: "status", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 7, name: "error", kind: "scalar", opt: true, T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<ProgressUpdate>): ProgressUpdate {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
message.progress = 0;
message.currentFrame = 0;
message.currentTime = 0;
message.speed = 0;
message.status = "";
if (value !== undefined)
reflectionMergePartial<ProgressUpdate>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: ProgressUpdate): ProgressUpdate {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
case /* int32 progress */ 2:
message.progress = reader.int32();
break;
case /* int32 current_frame */ 3:
message.currentFrame = reader.int32();
break;
case /* float current_time */ 4:
message.currentTime = reader.float();
break;
case /* float speed */ 5:
message.speed = reader.float();
break;
case /* string status */ 6:
message.status = reader.string();
break;
case /* optional string error */ 7:
message.error = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: ProgressUpdate, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
/* int32 progress = 2; */
if (message.progress !== 0)
writer.tag(2, WireType.Varint).int32(message.progress);
/* int32 current_frame = 3; */
if (message.currentFrame !== 0)
writer.tag(3, WireType.Varint).int32(message.currentFrame);
/* float current_time = 4; */
if (message.currentTime !== 0)
writer.tag(4, WireType.Bit32).float(message.currentTime);
/* float speed = 5; */
if (message.speed !== 0)
writer.tag(5, WireType.Bit32).float(message.speed);
/* string status = 6; */
if (message.status !== "")
writer.tag(6, WireType.LengthDelimited).string(message.status);
/* optional string error = 7; */
if (message.error !== undefined)
writer.tag(7, WireType.LengthDelimited).string(message.error);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.ProgressUpdate
*/
export const ProgressUpdate = new ProgressUpdate$Type();
// @generated message type with reflection information, may provide speed optimized methods
class CancelRequest$Type extends MessageType<CancelRequest> {
constructor() {
super("mpr.worker.CancelRequest", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<CancelRequest>): CancelRequest {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
if (value !== undefined)
reflectionMergePartial<CancelRequest>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: CancelRequest): CancelRequest {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: CancelRequest, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.CancelRequest
*/
export const CancelRequest = new CancelRequest$Type();
// @generated message type with reflection information, may provide speed optimized methods
class CancelResponse$Type extends MessageType<CancelResponse> {
constructor() {
super("mpr.worker.CancelResponse", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 2, name: "cancelled", kind: "scalar", T: 8 /*ScalarType.BOOL*/ },
{ no: 3, name: "message", kind: "scalar", T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<CancelResponse>): CancelResponse {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
message.cancelled = false;
message.message = "";
if (value !== undefined)
reflectionMergePartial<CancelResponse>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: CancelResponse): CancelResponse {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
case /* bool cancelled */ 2:
message.cancelled = reader.bool();
break;
case /* string message */ 3:
message.message = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: CancelResponse, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
/* bool cancelled = 2; */
if (message.cancelled !== false)
writer.tag(2, WireType.Varint).bool(message.cancelled);
/* string message = 3; */
if (message.message !== "")
writer.tag(3, WireType.LengthDelimited).string(message.message);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.CancelResponse
*/
export const CancelResponse = new CancelResponse$Type();
// @generated message type with reflection information, may provide speed optimized methods
class WorkerStatus$Type extends MessageType<WorkerStatus> {
constructor() {
super("mpr.worker.WorkerStatus", [
{ no: 1, name: "available", kind: "scalar", T: 8 /*ScalarType.BOOL*/ },
{ no: 2, name: "active_jobs", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 3, name: "supported_codecs", kind: "scalar", repeat: 2 /*RepeatType.UNPACKED*/, T: 9 /*ScalarType.STRING*/ },
{ no: 4, name: "gpu_available", kind: "scalar", T: 8 /*ScalarType.BOOL*/ }
]);
}
create(value?: PartialMessage<WorkerStatus>): WorkerStatus {
const message = globalThis.Object.create((this.messagePrototype!));
message.available = false;
message.activeJobs = 0;
message.supportedCodecs = [];
message.gpuAvailable = false;
if (value !== undefined)
reflectionMergePartial<WorkerStatus>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: WorkerStatus): WorkerStatus {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* bool available */ 1:
message.available = reader.bool();
break;
case /* int32 active_jobs */ 2:
message.activeJobs = reader.int32();
break;
case /* repeated string supported_codecs */ 3:
message.supportedCodecs.push(reader.string());
break;
case /* bool gpu_available */ 4:
message.gpuAvailable = reader.bool();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: WorkerStatus, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* bool available = 1; */
if (message.available !== false)
writer.tag(1, WireType.Varint).bool(message.available);
/* int32 active_jobs = 2; */
if (message.activeJobs !== 0)
writer.tag(2, WireType.Varint).int32(message.activeJobs);
/* repeated string supported_codecs = 3; */
for (let i = 0; i < message.supportedCodecs.length; i++)
writer.tag(3, WireType.LengthDelimited).string(message.supportedCodecs[i]);
/* bool gpu_available = 4; */
if (message.gpuAvailable !== false)
writer.tag(4, WireType.Varint).bool(message.gpuAvailable);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.WorkerStatus
*/
export const WorkerStatus = new WorkerStatus$Type();
// @generated message type with reflection information, may provide speed optimized methods
class Empty$Type extends MessageType<Empty> {
constructor() {
super("mpr.worker.Empty", []);
}
create(value?: PartialMessage<Empty>): Empty {
const message = globalThis.Object.create((this.messagePrototype!));
if (value !== undefined)
reflectionMergePartial<Empty>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: Empty): Empty {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: Empty, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.Empty
*/
export const Empty = new Empty$Type();
// @generated message type with reflection information, may provide speed optimized methods
class ChunkStreamRequest$Type extends MessageType<ChunkStreamRequest> {
constructor() {
super("mpr.worker.ChunkStreamRequest", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ }
]);
}
create(value?: PartialMessage<ChunkStreamRequest>): ChunkStreamRequest {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
if (value !== undefined)
reflectionMergePartial<ChunkStreamRequest>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: ChunkStreamRequest): ChunkStreamRequest {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: ChunkStreamRequest, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.ChunkStreamRequest
*/
export const ChunkStreamRequest = new ChunkStreamRequest$Type();
// @generated message type with reflection information, may provide speed optimized methods
class ChunkPipelineEvent$Type extends MessageType<ChunkPipelineEvent> {
constructor() {
super("mpr.worker.ChunkPipelineEvent", [
{ no: 1, name: "job_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 2, name: "event_type", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 3, name: "sequence", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 4, name: "worker_id", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 5, name: "state", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 6, name: "queue_size", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 7, name: "elapsed", kind: "scalar", T: 2 /*ScalarType.FLOAT*/ },
{ no: 8, name: "throughput_mbps", kind: "scalar", T: 2 /*ScalarType.FLOAT*/ },
{ no: 9, name: "total_chunks", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 10, name: "processed_chunks", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 11, name: "failed_chunks", kind: "scalar", T: 5 /*ScalarType.INT32*/ },
{ no: 12, name: "error", kind: "scalar", T: 9 /*ScalarType.STRING*/ },
{ no: 13, name: "processing_time", kind: "scalar", T: 2 /*ScalarType.FLOAT*/ },
{ no: 14, name: "retries", kind: "scalar", T: 5 /*ScalarType.INT32*/ }
]);
}
create(value?: PartialMessage<ChunkPipelineEvent>): ChunkPipelineEvent {
const message = globalThis.Object.create((this.messagePrototype!));
message.jobId = "";
message.eventType = "";
message.sequence = 0;
message.workerId = "";
message.state = "";
message.queueSize = 0;
message.elapsed = 0;
message.throughputMbps = 0;
message.totalChunks = 0;
message.processedChunks = 0;
message.failedChunks = 0;
message.error = "";
message.processingTime = 0;
message.retries = 0;
if (value !== undefined)
reflectionMergePartial<ChunkPipelineEvent>(this, message, value);
return message;
}
internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: ChunkPipelineEvent): ChunkPipelineEvent {
let message = target ?? this.create(), end = reader.pos + length;
while (reader.pos < end) {
let [fieldNo, wireType] = reader.tag();
switch (fieldNo) {
case /* string job_id */ 1:
message.jobId = reader.string();
break;
case /* string event_type */ 2:
message.eventType = reader.string();
break;
case /* int32 sequence */ 3:
message.sequence = reader.int32();
break;
case /* string worker_id */ 4:
message.workerId = reader.string();
break;
case /* string state */ 5:
message.state = reader.string();
break;
case /* int32 queue_size */ 6:
message.queueSize = reader.int32();
break;
case /* float elapsed */ 7:
message.elapsed = reader.float();
break;
case /* float throughput_mbps */ 8:
message.throughputMbps = reader.float();
break;
case /* int32 total_chunks */ 9:
message.totalChunks = reader.int32();
break;
case /* int32 processed_chunks */ 10:
message.processedChunks = reader.int32();
break;
case /* int32 failed_chunks */ 11:
message.failedChunks = reader.int32();
break;
case /* string error */ 12:
message.error = reader.string();
break;
case /* float processing_time */ 13:
message.processingTime = reader.float();
break;
case /* int32 retries */ 14:
message.retries = reader.int32();
break;
default:
let u = options.readUnknownField;
if (u === "throw")
throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`);
let d = reader.skip(wireType);
if (u !== false)
(u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d);
}
}
return message;
}
internalBinaryWrite(message: ChunkPipelineEvent, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter {
/* string job_id = 1; */
if (message.jobId !== "")
writer.tag(1, WireType.LengthDelimited).string(message.jobId);
/* string event_type = 2; */
if (message.eventType !== "")
writer.tag(2, WireType.LengthDelimited).string(message.eventType);
/* int32 sequence = 3; */
if (message.sequence !== 0)
writer.tag(3, WireType.Varint).int32(message.sequence);
/* string worker_id = 4; */
if (message.workerId !== "")
writer.tag(4, WireType.LengthDelimited).string(message.workerId);
/* string state = 5; */
if (message.state !== "")
writer.tag(5, WireType.LengthDelimited).string(message.state);
/* int32 queue_size = 6; */
if (message.queueSize !== 0)
writer.tag(6, WireType.Varint).int32(message.queueSize);
/* float elapsed = 7; */
if (message.elapsed !== 0)
writer.tag(7, WireType.Bit32).float(message.elapsed);
/* float throughput_mbps = 8; */
if (message.throughputMbps !== 0)
writer.tag(8, WireType.Bit32).float(message.throughputMbps);
/* int32 total_chunks = 9; */
if (message.totalChunks !== 0)
writer.tag(9, WireType.Varint).int32(message.totalChunks);
/* int32 processed_chunks = 10; */
if (message.processedChunks !== 0)
writer.tag(10, WireType.Varint).int32(message.processedChunks);
/* int32 failed_chunks = 11; */
if (message.failedChunks !== 0)
writer.tag(11, WireType.Varint).int32(message.failedChunks);
/* string error = 12; */
if (message.error !== "")
writer.tag(12, WireType.LengthDelimited).string(message.error);
/* float processing_time = 13; */
if (message.processingTime !== 0)
writer.tag(13, WireType.Bit32).float(message.processingTime);
/* int32 retries = 14; */
if (message.retries !== 0)
writer.tag(14, WireType.Varint).int32(message.retries);
let u = options.writeUnknownFields;
if (u !== false)
(u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer);
return writer;
}
}
/**
* @generated MessageType for protobuf message mpr.worker.ChunkPipelineEvent
*/
export const ChunkPipelineEvent = new ChunkPipelineEvent$Type();
/**
* @generated ServiceType for protobuf service mpr.worker.WorkerService
*/
export const WorkerService = new ServiceType("mpr.worker.WorkerService", [
{ name: "SubmitJob", options: {}, I: JobRequest, O: JobResponse },
{ name: "StreamProgress", serverStreaming: true, options: {}, I: ProgressRequest, O: ProgressUpdate },
{ name: "CancelJob", options: {}, I: CancelRequest, O: CancelResponse },
{ name: "GetWorkerStatus", options: {}, I: Empty, O: WorkerStatus },
{ name: "StreamChunkPipeline", serverStreaming: true, options: {}, I: ChunkStreamRequest, O: ChunkPipelineEvent }
]);

42
ui/common/api/media.ts Normal file
View File

@@ -0,0 +1,42 @@
/**
* Shared media API functions — identical across all MPR UI apps.
*/
import type { MediaAsset } from "../types/generated";
import { gql } from "./graphql";
/** Fetch all media assets. */
export async function getAssets(): Promise<MediaAsset[]> {
const data = await gql<{ assets: MediaAsset[] }>(`
query {
assets {
id filename file_path status error_message file_size duration
video_codec audio_codec width height framerate bitrate
properties comments tags created_at updated_at
}
}
`);
return data.assets;
}
/** Scan media/in/ folder for new files. */
export async function scanMediaFolder(): Promise<{
found: number;
registered: number;
skipped: number;
files: string[];
}> {
const data = await gql<{
scan_media_folder: {
found: number;
registered: number;
skipped: number;
files: string[];
};
}>(`
mutation {
scan_media_folder { found registered skipped files }
}
`);
return data.scan_media_folder;
}

View File

@@ -0,0 +1,97 @@
.file-manager {
margin-bottom: 1rem;
}
.fm-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.5rem;
}
.fm-header h2 {
font-size: 0.85rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--text-secondary);
}
.fm-scan-btn {
padding: 0.25rem 0.6rem;
background: var(--bg-input);
color: var(--text-secondary);
font-size: var(--font-size-xs);
}
.fm-scan-btn:hover:not(:disabled) {
color: var(--text-primary);
background: var(--border-light);
}
.fm-list {
list-style: none;
max-height: 200px;
overflow-y: auto;
border: 1px solid var(--border);
border-radius: var(--radius-sm);
background: var(--bg-primary);
}
.fm-empty {
padding: 1rem;
text-align: center;
color: var(--text-muted);
font-size: var(--font-size-sm);
}
.fm-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.4rem 0.6rem;
border-bottom: 1px solid var(--border);
transition: background 0.1s;
}
.fm-item:last-child {
border-bottom: none;
}
.fm-clickable {
cursor: pointer;
}
.fm-clickable:hover {
background: var(--bg-input);
}
.fm-selected {
background: var(--accent) !important;
color: #fff;
}
.fm-selected .fm-meta {
color: rgba(255, 255, 255, 0.7);
}
.fm-item-info {
display: flex;
flex-direction: column;
gap: 0.15rem;
overflow: hidden;
min-width: 0;
}
.fm-filename {
font-size: var(--font-size-sm);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.fm-meta {
font-size: var(--font-size-xs);
color: var(--text-muted);
}
.fm-actions {
flex-shrink: 0;
margin-left: 0.5rem;
}

View File

@@ -0,0 +1,84 @@
/**
* FileManager — pluggable file browser for S3/MinIO files.
*
* Handles both input file selection and output file listing.
* Used by timeline (assets + output), chunker (assets + chunk output),
* and future tools.
*/
import type { ReactNode } from "react";
import { formatSize } from "../utils/format";
import "./FileManager.css";
export interface FileEntry {
key: string;
name: string;
size?: number;
meta?: string;
}
interface FileManagerProps {
title: string;
files: FileEntry[];
selectedKey?: string | null;
onSelect?: (file: FileEntry) => void;
onScan?: () => void;
scanning?: boolean;
emptyMessage?: string;
renderActions?: (file: FileEntry) => ReactNode;
disabled?: boolean;
}
export function FileManager({
title,
files,
selectedKey,
onSelect,
onScan,
scanning = false,
emptyMessage = "No files",
renderActions,
disabled = false,
}: FileManagerProps) {
return (
<div className="file-manager">
<div className="fm-header">
<h2>{title}</h2>
{onScan && (
<button
className="fm-scan-btn"
onClick={onScan}
disabled={scanning || disabled}
>
{scanning ? "Scanning..." : "Scan Folder"}
</button>
)}
</div>
<ul className="fm-list">
{files.length === 0 ? (
<li className="fm-empty">{emptyMessage}</li>
) : (
files.map((file) => (
<li
key={file.key}
className={`fm-item ${selectedKey === file.key ? "fm-selected" : ""} ${onSelect && !disabled ? "fm-clickable" : ""}`}
onClick={() => onSelect && !disabled && onSelect(file)}
title={file.name}
>
<div className="fm-item-info">
<span className="fm-filename">{file.name}</span>
<span className="fm-meta">
{file.size != null && formatSize(file.size)}
{file.meta && (file.size != null ? ` · ${file.meta}` : file.meta)}
</span>
</div>
{renderActions && (
<div className="fm-actions">{renderActions(file)}</div>
)}
</li>
))
)}
</ul>
</div>
);
}

View File

@@ -0,0 +1,33 @@
/**
* StatusDot — small colored indicator for connection/state.
*/
const STATE_COLORS: Record<string, string> = {
connected: "var(--success)",
idle: "var(--text-muted)",
processing: "var(--processing)",
stopped: "var(--text-muted)",
error: "var(--error)",
done: "var(--success)",
};
interface StatusDotProps {
state: string;
glow?: boolean;
}
export function StatusDot({ state, glow = false }: StatusDotProps) {
const color = STATE_COLORS[state] || "var(--text-muted)";
return (
<span
style={{
display: "inline-block",
width: 8,
height: 8,
borderRadius: "50%",
background: color,
boxShadow: glow ? `0 0 6px ${color}` : undefined,
}}
/>
);
}

109
ui/common/styles/theme.css Normal file
View File

@@ -0,0 +1,109 @@
/**
* MPR Shared Theme — CSS custom properties + base styles.
* Import from any UI app: @import "../../common/styles/theme.css";
*/
:root {
--bg-primary: #0f0f0f;
--bg-panel: #1a1a1a;
--bg-surface: #141414;
--bg-input: #2a2a2a;
--border: #2a2a2a;
--border-light: #333;
--text-primary: #e0e0e0;
--text-secondary: #999;
--text-muted: #666;
--accent: #3b82f6;
--success: #10b981;
--warning: #f59e0b;
--error: #ef4444;
--processing: #3b82f6;
--radius: 8px;
--radius-sm: 4px;
--font-mono: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
"Fira Code", monospace, sans-serif;
--font-size: 14px;
--font-size-sm: 0.8rem;
--font-size-xs: 0.75rem;
}
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: var(--font-mono);
background: var(--bg-primary);
color: var(--text-primary);
font-size: var(--font-size);
}
/* Scrollbar */
::-webkit-scrollbar {
width: 6px;
}
::-webkit-scrollbar-track {
background: transparent;
}
::-webkit-scrollbar-thumb {
background: var(--border-light);
border-radius: 3px;
}
/* Shared button base */
button {
cursor: pointer;
border: none;
border-radius: var(--radius-sm);
font-family: var(--font-mono);
font-size: var(--font-size-sm);
transition: opacity 0.15s;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
/* Shared input base */
input,
select {
font-family: var(--font-mono);
font-size: var(--font-size-sm);
background: var(--bg-input);
color: var(--text-primary);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
padding: 0.4rem 0.5rem;
}
input:focus,
select:focus {
outline: none;
border-color: var(--accent);
}
/* Panel base */
.panel {
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 1rem;
}
.panel-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.75rem;
position: relative;
}
.panel-header h2 {
font-size: 0.85rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--text-secondary);
}

View File

@@ -0,0 +1,170 @@
/**
* TypeScript Types - GENERATED FILE
*
* Do not edit directly. Regenerate using modelgen.
*/
export type AssetStatus = "pending" | "ready" | "error";
export type JobStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
export type ChunkJobStatus = "pending" | "chunking" | "processing" | "collecting" | "completed" | "failed" | "cancelled";
export interface MediaAsset {
id: string;
filename: string;
file_path: string;
status: AssetStatus;
error_message: string | null;
file_size: number | null;
duration: number | null;
video_codec: string | null;
audio_codec: string | null;
width: number | null;
height: number | null;
framerate: number | null;
bitrate: number | null;
properties: Record<string, unknown>;
comments: string;
tags: string[];
created_at: string | null;
updated_at: string | null;
}
export interface TranscodePreset {
id: string;
name: string;
description: string;
is_builtin: boolean;
container: string;
video_codec: string;
video_bitrate: string | null;
video_crf: number | null;
video_preset: string | null;
resolution: string | null;
framerate: number | null;
audio_codec: string;
audio_bitrate: string | null;
audio_channels: number | null;
audio_samplerate: number | null;
extra_args: string[];
created_at: string | null;
updated_at: string | null;
}
export interface TranscodeJob {
id: string;
source_asset_id: string;
preset_id: string | null;
preset_snapshot: Record<string, unknown>;
trim_start: number | null;
trim_end: number | null;
output_filename: string;
output_path: string | null;
output_asset_id: string | null;
status: JobStatus;
progress: number;
current_frame: number | null;
current_time: number | null;
speed: string | null;
error_message: string | null;
celery_task_id: string | null;
execution_arn: string | null;
priority: number;
created_at: string | null;
started_at: string | null;
completed_at: string | null;
}
export interface ChunkJob {
id: string;
source_asset_id: string;
chunk_duration: number;
num_workers: number;
max_retries: number;
processor_type: string;
status: ChunkJobStatus;
progress: number;
total_chunks: number;
processed_chunks: number;
failed_chunks: number;
retry_count: number;
error_message: string | null;
throughput_mbps: number | null;
elapsed_seconds: number | null;
celery_task_id: string | null;
priority: number;
created_at: string | null;
started_at: string | null;
completed_at: string | null;
}
export interface CreateJobRequest {
source_asset_id: string;
preset_id: string | null;
trim_start: number | null;
trim_end: number | null;
output_filename: string | null;
priority: number;
}
export interface UpdateAssetRequest {
comments: string | null;
tags: string[] | null;
}
export interface SystemStatus {
status: string;
version: string;
}
export interface ScanResult {
found: number;
registered: number;
skipped: number;
files: string[];
}
export interface DeleteResult {
ok: boolean;
}
export interface WorkerStatus {
available: boolean;
active_jobs: number;
supported_codecs: string[];
gpu_available: boolean;
}
export interface ChunkEvent {
sequence: number;
status: string;
size: number | null;
worker_id: string | null;
processing_time: number | null;
error: string | null;
retries: number;
}
export interface WorkerEvent {
worker_id: string;
state: string;
current_chunk: number | null;
processed: number;
errors: number;
retries: number;
}
export interface PipelineStats {
total_chunks: number;
processed: number;
failed: number;
retries: number;
elapsed: number;
throughput_mbps: number;
queue_size: number;
}
export interface ChunkOutputFile {
key: string;
size: number;
url: string;
}

21
ui/common/utils/format.ts Normal file
View File

@@ -0,0 +1,21 @@
/**
* Shared formatting utilities.
*/
export function formatSize(bytes: number | null | undefined): string {
if (!bytes) return "—";
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
export function formatDuration(seconds: number | null | undefined): string {
if (!seconds) return "—";
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = Math.floor(seconds % 60);
if (h > 0)
return `${h}:${m.toString().padStart(2, "0")}:${s.toString().padStart(2, "0")}`;
return `${m}:${s.toString().padStart(2, "0")}`;
}

1736
ui/timeline/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,4 @@
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family:
-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: #1a1a1a;
color: #e0e0e0;
}
@import "../../common/styles/theme.css";
.app {
display: flex;

View File

@@ -115,7 +115,6 @@ function App() {
setJobs(data);
};
const assetJobs = jobs.filter((j) => j.source_asset_id === selectedAsset?.id);
const completedJobs = jobs.filter((j) => j.status === "completed");
if (loading) return <div className="loading">Loading...</div>;

View File

@@ -42,6 +42,8 @@ export default function JobPanel({
preset_id: selectedPresetId || null,
trim_start: hasTrim ? trimStart : null,
trim_end: hasTrim ? trimEnd : null,
output_filename: null,
priority: 0,
});
onJobCreated();
} catch (e) {

View File

@@ -2,45 +2,17 @@
* GraphQL API client
*/
import { gql } from "../../common/api/graphql";
import { getAssets, scanMediaFolder } from "../../common/api/media";
import type {
MediaAsset,
TranscodePreset,
TranscodeJob,
CreateJobRequest,
SystemStatus,
MediaAsset,
} from "./types";
const GRAPHQL_URL = "/api/graphql";
async function gql<T>(query: string, variables?: Record<string, unknown>): Promise<T> {
const response = await fetch(GRAPHQL_URL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ query, variables }),
});
const json = await response.json();
if (json.errors?.length) {
throw new Error(json.errors[0].message);
}
return json.data as T;
}
// Assets
export async function getAssets(): Promise<MediaAsset[]> {
const data = await gql<{ assets: MediaAsset[] }>(`
query {
assets {
id filename file_path status error_message file_size duration
video_codec audio_codec width height framerate bitrate
properties comments tags created_at updated_at
}
}
`);
return data.assets;
}
export { getAssets, scanMediaFolder };
export async function getAsset(id: string): Promise<MediaAsset> {
const data = await gql<{ asset: MediaAsset }>(`
@@ -55,20 +27,6 @@ export async function getAsset(id: string): Promise<MediaAsset> {
return data.asset;
}
export async function scanMediaFolder(): Promise<{
found: number;
registered: number;
skipped: number;
files: string[];
}> {
const data = await gql<{ scan_media_folder: { found: number; registered: number; skipped: number; files: string[] } }>(`
mutation {
scan_media_folder { found registered skipped files }
}
`);
return data.scan_media_folder;
}
// Presets
export async function getPresets(): Promise<TranscodePreset[]> {
const data = await gql<{ presets: TranscodePreset[] }>(`

View File

@@ -1,111 +1,21 @@
/**
* TypeScript Types - GENERATED FILE
* TypeScript Types — re-exported from common generated types.
*
* Do not edit directly. Regenerate using modelgen.
*/
export type AssetStatus = "pending" | "ready" | "error";
export type JobStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
export interface MediaAsset {
id: string;
filename: string;
file_path: string;
status: AssetStatus;
error_message: string | null;
file_size: number | null;
duration: number | null;
video_codec: string | null;
audio_codec: string | null;
width: number | null;
height: number | null;
framerate: number | null;
bitrate: number | null;
properties: Record<string, unknown>;
comments: string;
tags: string[];
created_at: string | null;
updated_at: string | null;
}
export interface TranscodePreset {
id: string;
name: string;
description: string;
is_builtin: boolean;
container: string;
video_codec: string;
video_bitrate: string | null;
video_crf: number | null;
video_preset: string | null;
resolution: string | null;
framerate: number | null;
audio_codec: string;
audio_bitrate: string | null;
audio_channels: number | null;
audio_samplerate: number | null;
extra_args: string[];
created_at: string | null;
updated_at: string | null;
}
export interface TranscodeJob {
id: string;
source_asset_id: string;
preset_id: string | null;
preset_snapshot: Record<string, unknown>;
trim_start: number | null;
trim_end: number | null;
output_filename: string;
output_path: string | null;
output_asset_id: string | null;
status: JobStatus;
progress: number;
current_frame: number | null;
current_time: number | null;
speed: string | null;
error_message: string | null;
celery_task_id: string | null;
execution_arn: string | null;
priority: number;
created_at: string | null;
started_at: string | null;
completed_at: string | null;
}
export interface CreateJobRequest {
source_asset_id: string;
preset_id: string | null;
trim_start: number | null;
trim_end: number | null;
output_filename: string | null;
priority: number;
}
export interface UpdateAssetRequest {
comments: string | null;
tags: string[] | null;
}
export interface SystemStatus {
status: string;
version: string;
}
export interface ScanResult {
found: number;
registered: number;
skipped: number;
files: string[];
}
export interface DeleteResult {
ok: boolean;
}
export interface WorkerStatus {
available: boolean;
active_jobs: number;
supported_codecs: string[];
gpu_available: boolean;
}
export type {
AssetStatus,
JobStatus,
ChunkJobStatus,
MediaAsset,
TranscodePreset,
TranscodeJob,
ChunkJob,
CreateJobRequest,
UpdateAssetRequest,
SystemStatus,
ScanResult,
DeleteResult,
WorkerStatus,
} from "../../common/types/generated";

Some files were not shown because too many files have changed in this diff Show More