chunker and ui

2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,30 @@
 # Python
 .venv/
 __pycache__/
 *.pyc
 *.egg-info/
 .pytest_cache/
 # Node
 node_modules/
 ui/*/node_modules/
 ui/*/dist/
 # Media (9.8GB — mounted via volume, never needed in image)
 media/
 # Git
 .git/
 # IDE / OS
 .idea/
 .vscode/
 *.swp
 .DS_Store
 # Docker
 ctrl/docker-compose.yml
 # Docs
 docs/
 *.md
--- a/admin/mpr/celery.py
+++ b/admin/mpr/celery.py
@@ -7,4 +7,4 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "admin.mpr.settings")
 app = Celery("mpr")
 app.config_from_object("django.conf:settings", namespace="CELERY")
 app.autodiscover_tasks()
-app.autodiscover_tasks(["core.task"])
+app.autodiscover_tasks(["core.jobs"])
--- a/admin/mpr/media_assets/models.py
+++ b/admin/mpr/media_assets/models.py
@@ -19,6 +19,15 @@ class JobStatus(models.TextChoices):
    FAILED = "failed", "Failed"
    CANCELLED = "cancelled", "Cancelled"
 class ChunkJobStatus(models.TextChoices):
    PENDING = "pending", "Pending"
    CHUNKING = "chunking", "Chunking"
    PROCESSING = "processing", "Processing"
    COLLECTING = "collecting", "Collecting"
    COMPLETED = "completed", "Completed"
    FAILED = "failed", "Failed"
    CANCELLED = "cancelled", "Cancelled"
 class MediaAsset(models.Model):
    """A video/audio file registered in the system."""
@@ -108,3 +117,34 @@ class TranscodeJob(models.Model):
    def __str__(self):
        return str(self.id)
 class ChunkJob(models.Model):
    """A chunk pipeline job — splits a media file into chunks and processes them"""
    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    source_asset_id = models.UUIDField()
    chunk_duration = models.FloatField(default=10.0)
    num_workers = models.IntegerField(default=4)
    max_retries = models.IntegerField(default=3)
    processor_type = models.CharField(max_length=255)
    status = models.CharField(max_length=20, choices=ChunkJobStatus.choices, default=ChunkJobStatus.PENDING)
    progress = models.FloatField(default=0.0)
    total_chunks = models.IntegerField(default=0)
    processed_chunks = models.IntegerField(default=0)
    failed_chunks = models.IntegerField(default=0)
    retry_count = models.IntegerField(default=0)
    error_message = models.TextField(blank=True, default='')
    throughput_mbps = models.FloatField(null=True, blank=True, default=None)
    elapsed_seconds = models.FloatField(null=True, blank=True, default=None)
    celery_task_id = models.CharField(max_length=255, null=True, blank=True)
    priority = models.IntegerField(default=0)
    created_at = models.DateTimeField(auto_now_add=True)
    started_at = models.DateTimeField(null=True, blank=True)
    completed_at = models.DateTimeField(null=True, blank=True)
    class Meta:
        ordering = ["-created_at"]
    def __str__(self):
        return str(self.id)
--- a/core/api/chunker_sse.py
+++ b/core/api/chunker_sse.py
@@ -0,0 +1,78 @@
 """
 SSE endpoint for chunker pipeline events.
 Bridges gRPC StreamProgress to browser-native EventSource.
 GET /api/chunker/stream/{job_id} → text/event-stream
 """
 import asyncio
 import json
 import logging
 import time
 from typing import AsyncGenerator
 from fastapi import APIRouter
 from starlette.responses import StreamingResponse
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/chunker", tags=["chunker"])
 async def _event_generator(job_id: str) -> AsyncGenerator[str, None]:
    """
    Generate SSE events by polling gRPC job state.
    Yields server-sent events in the format:
        event: <event_type>
        data: <json_payload>
    """
    from core.rpc.server import _active_jobs
    last_state = None
    timeout = time.monotonic() + 600  # 10 min max
    while time.monotonic() < timeout:
        job_state = _active_jobs.get(job_id)
        if job_state is None:
            # Job not found yet — may not have started
            yield f"event: waiting\ndata: {json.dumps({'job_id': job_id})}\n\n"
            await asyncio.sleep(0.5)
            continue
        # Only send if state changed
        if job_state != last_state:
            last_state = dict(job_state)
            event_type = job_state.get("status", "update")
            yield f"event: {event_type}\ndata: {json.dumps({**job_state, 'job_id': job_id})}\n\n"
            # End stream when job is terminal
            if event_type in ("completed", "failed", "cancelled"):
                yield f"event: done\ndata: {json.dumps({'job_id': job_id})}\n\n"
                break
        await asyncio.sleep(0.2)
    yield f"event: timeout\ndata: {json.dumps({'job_id': job_id})}\n\n"
@router.get("/stream/{job_id}")
 async def stream_chunk_job(job_id: str):
    """
    SSE stream for a chunk pipeline job.
    The UI connects via native EventSource:
        const es = new EventSource('/api/chunker/stream/<job_id>');
        es.addEventListener('processing', (e) => { ... });
    """
    return StreamingResponse(
        _event_generator(job_id),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )
--- a/core/api/graphql.py
+++ b/core/api/graphql.py
@@ -15,6 +15,8 @@ from strawberry.schema.config import StrawberryConfig
 from strawberry.types import Info
 from core.api.schema.graphql import (
    ChunkJobType,
    CreateChunkJobInput,
    CreateJobInput,
    DeleteResultType,
    MediaAssetType,
@@ -172,30 +174,31 @@ class Mutation:
            priority=input.priority or 0,
        )
        payload = {
            "source_key": source.file_path,
            "output_key": output_filename,
            "preset": preset_snapshot or None,
            "trim_start": input.trim_start,
            "trim_end": input.trim_end,
            "duration": source.duration,
        }
        executor_mode = os.environ.get("MPR_EXECUTOR", "local")
        if executor_mode in ("lambda", "gcp"):
-            from core.task.executor import get_executor
+            from core.jobs.executor import get_executor
            get_executor().run(
                job_type="transcode",
                job_id=str(job.id),
-                source_path=source.file_path,
+                payload=payload,
                output_path=output_filename,
                preset=preset_snapshot or None,
                trim_start=input.trim_start,
                trim_end=input.trim_end,
                duration=source.duration,
            )
        else:
-            from core.task.tasks import run_transcode_job
+            from core.jobs.task import run_job
-            result = run_transcode_job.delay(
+            result = run_job.delay(
                job_type="transcode",
                job_id=str(job.id),
-                source_key=source.file_path,
+                payload=payload,
                output_key=output_filename,
                preset=preset_snapshot or None,
                trim_start=input.trim_start,
                trim_end=input.trim_end,
                duration=source.duration,
            )
            job.celery_task_id = result.id
            job.save(update_fields=["celery_task_id"])
@@ -261,6 +264,62 @@ class Mutation:
        except Exception:
            raise Exception("Asset not found")
    @strawberry.mutation
    def create_chunk_job(self, info: Info, input: CreateChunkJobInput) -> ChunkJobType:
        """Create and dispatch a chunk pipeline job."""
        import uuid
        from core.db import get_asset
        try:
            source = get_asset(input.source_asset_id)
        except Exception:
            raise Exception("Source asset not found")
        job_id = str(uuid.uuid4())
        payload = {
            "source_key": source.file_path,
            "chunk_duration": input.chunk_duration,
            "num_workers": input.num_workers,
            "max_retries": input.max_retries,
            "processor_type": input.processor_type,
        }
        executor_mode = os.environ.get("MPR_EXECUTOR", "local")
        celery_task_id = None
        if executor_mode in ("lambda", "gcp"):
            from core.jobs.executor import get_executor
            get_executor().run(
                job_type="chunk",
                job_id=job_id,
                payload=payload,
            )
        else:
            from core.jobs.task import run_job
            result = run_job.delay(
                job_type="chunk",
                job_id=job_id,
                payload=payload,
            )
            celery_task_id = result.id
        return ChunkJobType(
            id=uuid.UUID(job_id),
            source_asset_id=input.source_asset_id,
            chunk_duration=input.chunk_duration,
            num_workers=input.num_workers,
            max_retries=input.max_retries,
            processor_type=input.processor_type,
            status="pending",
            progress=0.0,
            priority=input.priority,
            celery_task_id=celery_task_id,
        )
 # ---------------------------------------------------------------------------
 # Schema
--- a/core/api/main.py
+++ b/core/api/main.py
@@ -23,6 +23,7 @@ from fastapi import FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from strawberry.fastapi import GraphQLRouter
 from core.api.chunker_sse import router as chunker_router
 from core.api.graphql import schema as graphql_schema
 CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@@ -48,6 +49,9 @@ app.add_middleware(
 graphql_router = GraphQLRouter(schema=graphql_schema, graphql_ide="graphiql")
 app.include_router(graphql_router, prefix="/graphql")
 # Chunker SSE
 app.include_router(chunker_router)
@app.get("/")
 def root():
--- a/core/api/schema/graphql.py
+++ b/core/api/schema/graphql.py
@@ -156,3 +156,52 @@ class WorkerStatusType:
    active_jobs: Optional[int] = None
    supported_codecs: Optional[List[str]] = None
    gpu_available: Optional[bool] = None
@strawberry.enum
 class ChunkJobStatus(Enum):
    PENDING = "pending"
    CHUNKING = "chunking"
    PROCESSING = "processing"
    COLLECTING = "collecting"
    COMPLETED = "completed"
    FAILED = "failed"
    CANCELLED = "cancelled"
@strawberry.type
 class ChunkJobType:
    """A chunk pipeline job."""
    id: Optional[UUID] = None
    source_asset_id: Optional[UUID] = None
    chunk_duration: Optional[float] = None
    num_workers: Optional[int] = None
    max_retries: Optional[int] = None
    processor_type: Optional[str] = None
    status: Optional[str] = None
    progress: Optional[float] = None
    total_chunks: Optional[int] = None
    processed_chunks: Optional[int] = None
    failed_chunks: Optional[int] = None
    retry_count: Optional[int] = None
    error_message: Optional[str] = None
    throughput_mbps: Optional[float] = None
    elapsed_seconds: Optional[float] = None
    celery_task_id: Optional[str] = None
    priority: Optional[int] = None
    created_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None
@strawberry.input
 class CreateChunkJobInput:
    """Request body for creating a chunk pipeline job."""
    source_asset_id: UUID
    chunk_duration: float = 10.0
    num_workers: int = 4
    max_retries: int = 3
    processor_type: str = "ffmpeg"
    priority: int = 0
--- a/core/chunker/init.py
+++ b/core/chunker/init.py
@@ -0,0 +1,64 @@
 """
 Chunker pipeline — splits files into chunks, processes concurrently, reassembles in order.
 Public API:
    Pipeline      — orchestrates the full pipeline
    PipelineResult — aggregate result dataclass
    Chunker       — file → Chunk generator
    ChunkQueue    — bounded thread-safe queue
    WorkerPool    — manages N worker threads
    ResultCollector — heapq-based ordered reassembly
 """
 from .chunker import Chunker
 from .collector import ResultCollector
 from .exceptions import (
    ChunkChecksumError,
    ChunkError,
    ChunkReadError,
    PipelineError,
    ProcessingError,
    ProcessorFailureError,
    ProcessorTimeoutError,
    ReassemblyError,
 )
 from .models import Chunk, ChunkResult, PipelineResult
 from .pipeline import Pipeline
 from .pool import WorkerPool
 from .processor import (
    ChecksumProcessor,
    CompositeProcessor,
    FFmpegExtractProcessor,
    Processor,
    SimulatedDecodeProcessor,
 )
 from .queue import ChunkQueue
 __all__ = [
    # Core
    "Pipeline",
    "PipelineResult",
    # Components
    "Chunker",
    "ChunkQueue",
    "WorkerPool",
    "ResultCollector",
    # Models
    "Chunk",
    "ChunkResult",
    # Processors
    "Processor",
    "ChecksumProcessor",
    "SimulatedDecodeProcessor",
    "CompositeProcessor",
    "FFmpegExtractProcessor",
    # Exceptions
    "PipelineError",
    "ChunkError",
    "ChunkReadError",
    "ChunkChecksumError",
    "ProcessingError",
    "ProcessorFailureError",
    "ProcessorTimeoutError",
    "ReassemblyError",
 ]
--- a/core/chunker/chunker.py
+++ b/core/chunker/chunker.py
@@ -0,0 +1,86 @@
 """
 Chunker — probes a media file and yields time-based Chunk objects.
 Demonstrates:
 - Function parameters and defaults (Interview Topic 1)
 - List comprehensions and efficient iteration / generators (Interview Topic 3)
 """
 import math
 import os
 from typing import Generator
 from core.ffmpeg.probe import probe_file
 from .exceptions import ChunkReadError
 from .models import Chunk
 class Chunker:
    """
    Splits a media file into time-based chunks via a generator.
    Uses FFmpeg probe to get duration, then yields Chunk objects
    representing time segments (no data read — extraction happens in the processor).
    Args:
        file_path: Path to the source media file
        chunk_duration: Duration of each chunk in seconds (default: 10.0)
    """
    def __init__(self, file_path: str, chunk_duration: float = 10.0):
        if not os.path.isfile(file_path):
            raise ChunkReadError(f"File not found: {file_path}")
        if chunk_duration <= 0:
            raise ValueError("chunk_duration must be positive")
        self.file_path = file_path
        self.chunk_duration = chunk_duration
        self.file_size = os.path.getsize(file_path)
        self.source_duration = self._probe_duration()
    def _probe_duration(self) -> float:
        """Get source file duration via FFmpeg probe."""
        try:
            result = probe_file(self.file_path)
            if result.duration is None or result.duration <= 0:
                raise ChunkReadError(
                    f"Cannot determine duration for {self.file_path}"
                )
            return result.duration
        except ChunkReadError:
            raise
        except Exception as e:
            raise ChunkReadError(
                f"Failed to probe {self.file_path}: {e}"
            ) from e
    @property
    def expected_chunks(self) -> int:
        """Calculate expected number of chunks (last chunk may be shorter)."""
        if self.source_duration <= 0:
            return 0
        return math.ceil(self.source_duration / self.chunk_duration)
    def chunks(self) -> Generator[Chunk, None, None]:
        """
        Yield Chunk objects representing time segments of the source file.
        Generator-based: chunks are yielded on demand.
        Each chunk defines a time range — actual extraction is done by the processor.
        """
        total = self.expected_chunks
        for sequence in range(total):
            start_time = sequence * self.chunk_duration
            end_time = min(
                start_time + self.chunk_duration, self.source_duration
            )
            duration = end_time - start_time
            yield Chunk(
                sequence=sequence,
                start_time=start_time,
                end_time=end_time,
                source_path=self.file_path,
                duration=duration,
            )
--- a/core/chunker/collector.py
+++ b/core/chunker/collector.py
@@ -0,0 +1,98 @@
 """
 ResultCollector — reassembles chunk results in sequence order using a min-heap.
 Demonstrates:
 - Algorithms and sorting (Interview Topic 6) — heapq for ordered reassembly
 - Core data structures (Interview Topic 5) — heap, deque
 """
 import heapq
 from collections import deque
 from typing import List
 from .exceptions import ReassemblyError
 from .models import ChunkResult
 class ResultCollector:
    """
    Receives ChunkResults out of order, emits them in sequence order.
    Uses a min-heap keyed on sequence number. Only emits a chunk when
    all prior sequences have been accounted for.
    Args:
        total_chunks: Expected total number of chunks
    """
    def __init__(self, total_chunks: int):
        self.total_chunks = total_chunks
        self._heap: List[tuple[int, ChunkResult]] = []
        self._next_sequence = 0
        self._emitted: List[ChunkResult] = []
        self._seen_sequences: set[int] = set()
        # Sliding window for throughput calculation
        self._recent_times: deque[float] = deque(maxlen=50)
    def add(self, result: ChunkResult) -> List[ChunkResult]:
        """
        Add a result and return any newly emittable results in order.
        Args:
            result: A ChunkResult (may arrive out of order)
        Returns:
            List of results that can now be emitted in sequence order
            (may be empty if we're still waiting for earlier sequences)
        Raises:
            ReassemblyError: If a duplicate sequence is received
        """
        if result.sequence in self._seen_sequences:
            raise ReassemblyError(
                f"Duplicate sequence number: {result.sequence}"
            )
        self._seen_sequences.add(result.sequence)
        # Track processing time for throughput
        if result.processing_time > 0:
            self._recent_times.append(result.processing_time)
        # Push to min-heap
        heapq.heappush(self._heap, (result.sequence, result))
        # Emit all consecutive results starting from _next_sequence
        newly_emitted = []
        while self._heap and self._heap[0][0] == self._next_sequence:
            _, emitted_result = heapq.heappop(self._heap)
            self._emitted.append(emitted_result)
            newly_emitted.append(emitted_result)
            self._next_sequence += 1
        return newly_emitted
    @property
    def is_complete(self) -> bool:
        """True if all expected chunks have been emitted in order."""
        return self._next_sequence == self.total_chunks
    @property
    def buffered_count(self) -> int:
        """Number of results waiting in the heap (arrived out of order)."""
        return len(self._heap)
    @property
    def emitted_count(self) -> int:
        """Number of results emitted in sequence order."""
        return len(self._emitted)
    @property
    def avg_processing_time(self) -> float:
        """Average processing time from recent results (sliding window)."""
        if not self._recent_times:
            return 0.0
        return sum(self._recent_times) / len(self._recent_times)
    def get_ordered_results(self) -> List[ChunkResult]:
        """Get all emitted results in sequence order."""
        return list(self._emitted)
--- a/core/chunker/exceptions.py
+++ b/core/chunker/exceptions.py
@@ -0,0 +1,64 @@
 """
 Chunker exception hierarchy.
 Demonstrates: Managing exceptions and writing resilient code (Interview Topic 7).
 """
 class PipelineError(Exception):
    """Base exception for all chunker pipeline errors."""
    pass
 class ChunkError(PipelineError):
    """Errors related to chunk creation or validation."""
    pass
 class ChunkReadError(ChunkError):
    """Failed to read chunk data from source file."""
    pass
 class ChunkChecksumError(ChunkError):
    """Chunk data integrity validation failed."""
    def __init__(self, sequence: int, expected: str, actual: str):
        self.sequence = sequence
        self.expected = expected
        self.actual = actual
        super().__init__(
            f"Chunk {sequence}: checksum mismatch "
            f"(expected={expected}, actual={actual})"
        )
 class ProcessingError(PipelineError):
    """Errors during chunk processing by workers."""
    pass
 class ProcessorTimeoutError(ProcessingError):
    """Processor exceeded allowed time for a chunk."""
    def __init__(self, sequence: int, timeout: float):
        self.sequence = sequence
        self.timeout = timeout
        super().__init__(f"Chunk {sequence}: processor timed out after {timeout}s")
 class ProcessorFailureError(ProcessingError):
    """Processor failed to process a chunk after all retries."""
    def __init__(self, sequence: int, retries: int, original_error: Exception):
        self.sequence = sequence
        self.retries = retries
        self.original_error = original_error
        super().__init__(
            f"Chunk {sequence}: failed after {retries} retries — {original_error}"
        )
 class ReassemblyError(PipelineError):
    """Errors during result collection and ordering."""
    pass
--- a/core/chunker/models.py
+++ b/core/chunker/models.py
@@ -0,0 +1,54 @@
 """
 Internal data models for the chunker pipeline.
 These are pipeline-internal dataclasses, not schema models.
 Schema-level ChunkJob is in core/schema/models/jobs.py.
 Demonstrates: Core data structures (Interview Topic 5).
 """
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
@dataclass
 class Chunk:
    """A time-based segment of the source media file."""
    sequence: int
    start_time: float  # seconds
    end_time: float  # seconds
    source_path: str  # path to source file
    duration: float  # end_time - start_time
    checksum: str = ""  # computed after extraction
@dataclass
 class ChunkResult:
    """Result of processing a single chunk."""
    sequence: int
    success: bool
    checksum_valid: bool = True
    processing_time: float = 0.0
    error: Optional[str] = None
    retries: int = 0
    worker_id: Optional[str] = None
    output_file: Optional[str] = None
@dataclass
 class PipelineResult:
    """Aggregate result of the entire pipeline run."""
    total_chunks: int = 0
    processed: int = 0
    failed: int = 0
    retries: int = 0
    elapsed_time: float = 0.0
    throughput_mbps: float = 0.0
    worker_stats: Dict[str, Any] = field(default_factory=dict)
    errors: List[str] = field(default_factory=list)
    chunks_in_order: bool = True
    output_dir: Optional[str] = None
    chunk_files: List[str] = field(default_factory=list)
--- a/core/chunker/pipeline.py
+++ b/core/chunker/pipeline.py
@@ -0,0 +1,244 @@
 """
 Pipeline — orchestrates the entire chunker pipeline.
 Wires: Chunker → ChunkQueue → WorkerPool → ResultCollector → PipelineResult
 Demonstrates:
 - Function parameters and defaults (Interview Topic 1) — configurable pipeline
 - Concurrency (Interview Topic 2) — producer thread + worker pool
 - OOP design (Interview Topic 4) — composition of pipeline components
 - Exception handling (Interview Topic 7) — graceful error propagation
 """
 import json
 import logging
 import threading
 import time
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional
 from .chunker import Chunker
 from .collector import ResultCollector
 from .exceptions import PipelineError
 from .models import PipelineResult
 from .pool import WorkerPool
 from .queue import ChunkQueue
 logger = logging.getLogger(__name__)
 class Pipeline:
    """
    Orchestrates the chunk processing pipeline.
    The pipeline runs in three stages:
    1. Producer thread: Chunker probes file → pushes time-based chunks to ChunkQueue
    2. Worker pool: N workers pull from queue → extract mp4 segments → emit results
    3. Collector: ResultCollector reassembles results in sequence order
    Args:
        source: Path to the source media file
        chunk_duration: Duration of each chunk in seconds (default: 10.0)
        num_workers: Number of concurrent worker threads (default: 4)
        max_retries: Max retry attempts per chunk (default: 3)
        processor_type: Processor to use — "ffmpeg", "checksum", "simulated_decode", "composite"
        queue_size: Max chunks buffered in queue (default: 10)
        event_callback: Optional callback for real-time events
        output_dir: Directory for output chunk files (required for "ffmpeg" processor)
    """
    def __init__(
        self,
        source: str,
        chunk_duration: float = 10.0,
        num_workers: int = 4,
        max_retries: int = 3,
        processor_type: str = "checksum",
        queue_size: int = 10,
        event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
        output_dir: Optional[str] = None,
    ):
        self.source = source
        self.chunk_duration = chunk_duration
        self.num_workers = num_workers
        self.max_retries = max_retries
        self.processor_type = processor_type
        self.queue_size = queue_size
        self.event_callback = event_callback
        self.output_dir = output_dir
    def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
        """Emit an event if callback is registered."""
        if self.event_callback:
            self.event_callback(event_type, data)
    def _produce_chunks(
        self, chunker: Chunker, chunk_queue: ChunkQueue
    ) -> None:
        """Producer thread: probe file and enqueue time-based chunks."""
        try:
            for chunk in chunker.chunks():
                chunk_queue.put(chunk, timeout=30.0)
                self._emit("chunk_queued", {
                    "sequence": chunk.sequence,
                    "start_time": chunk.start_time,
                    "end_time": chunk.end_time,
                    "duration": chunk.duration,
                    "queue_size": chunk_queue.qsize(),
                })
        except Exception as e:
            logger.error(f"Producer error: {e}")
            self._emit("producer_error", {"error": str(e)})
        finally:
            chunk_queue.close()
    def _write_manifest(
        self, result: PipelineResult, source_duration: float
    ) -> None:
        """Write manifest.json to output_dir with segment metadata."""
        if not self.output_dir:
            return
        manifest = {
            "source": self.source,
            "source_duration": source_duration,
            "chunk_duration": self.chunk_duration,
            "total_chunks": result.total_chunks,
            "processed": result.processed,
            "failed": result.failed,
            "elapsed_time": result.elapsed_time,
            "throughput_mbps": result.throughput_mbps,
            "segments": [
                {
                    "sequence": i,
                    "file": f"chunk_{i:04d}.mp4",
                    "start": i * self.chunk_duration,
                    "end": min(
                        (i + 1) * self.chunk_duration, source_duration
                    ),
                }
                for i in range(result.total_chunks)
                if i < result.total_chunks
            ],
        }
        manifest_path = Path(self.output_dir) / "manifest.json"
        manifest_path.write_text(json.dumps(manifest, indent=2))
        logger.info(f"Manifest written to {manifest_path}")
    def run(self) -> PipelineResult:
        """
        Execute the full pipeline.
        Returns:
            PipelineResult with aggregate stats
        Raises:
            PipelineError: If the pipeline fails catastrophically
        """
        start_time = time.monotonic()
        self._emit("pipeline_start", {
            "source": self.source,
            "chunk_duration": self.chunk_duration,
            "num_workers": self.num_workers,
            "processor_type": self.processor_type,
        })
        try:
            # Stage 1: Set up chunker (probes file for duration)
            chunker = Chunker(self.source, self.chunk_duration)
            total_chunks = chunker.expected_chunks
            if total_chunks == 0:
                self._emit("pipeline_complete", {"total_chunks": 0})
                return PipelineResult(chunks_in_order=True)
            self._emit("pipeline_info", {
                "file_size": chunker.file_size,
                "source_duration": chunker.source_duration,
                "total_chunks": total_chunks,
            })
            # Stage 2: Set up queue and worker pool
            chunk_queue = ChunkQueue(maxsize=self.queue_size)
            pool = WorkerPool(
                num_workers=self.num_workers,
                chunk_queue=chunk_queue,
                processor_type=self.processor_type,
                max_retries=self.max_retries,
                event_callback=self.event_callback,
                output_dir=self.output_dir,
            )
            # Stage 3: Start workers, then produce chunks
            pool.start()
            producer = threading.Thread(
                target=self._produce_chunks,
                args=(chunker, chunk_queue),
                name="chunk-producer",
                daemon=True,
            )
            producer.start()
            # Stage 4: Wait for all workers to finish
            all_results = pool.wait()
            producer.join(timeout=5.0)
            # Stage 5: Collect results in order
            collector = ResultCollector(total_chunks)
            for r in all_results:
                collector.add(r)
                self._emit("chunk_collected", {
                    "sequence": r.sequence,
                    "success": r.success,
                    "buffered": collector.buffered_count,
                    "emitted": collector.emitted_count,
                })
            # Build result
            elapsed = time.monotonic() - start_time
            file_size_mb = chunker.file_size / (1024 * 1024)
            throughput = file_size_mb / elapsed if elapsed > 0 else 0.0
            failed_results = [r for r in all_results if not r.success]
            total_retries = sum(r.retries for r in all_results)
            chunk_files = [
                r.output_file for r in all_results
                if r.success and r.output_file
            ]
            result = PipelineResult(
                total_chunks=total_chunks,
                processed=len(all_results),
                failed=len(failed_results),
                retries=total_retries,
                elapsed_time=elapsed,
                throughput_mbps=throughput,
                worker_stats=pool.get_worker_stats(),
                errors=[r.error for r in failed_results if r.error],
                chunks_in_order=collector.is_complete,
                output_dir=self.output_dir,
                chunk_files=chunk_files,
            )
            # Write manifest if output_dir is set
            self._write_manifest(result, chunker.source_duration)
            pool.shutdown()
            self._emit("pipeline_complete", {
                "total_chunks": result.total_chunks,
                "processed": result.processed,
                "failed": result.failed,
                "elapsed": result.elapsed_time,
                "throughput_mbps": result.throughput_mbps,
            })
            return result
        except PipelineError:
            raise
        except Exception as e:
            self._emit("pipeline_error", {"error": str(e)})
            raise PipelineError(f"Pipeline failed: {e}") from e
--- a/core/chunker/pool.py
+++ b/core/chunker/pool.py
@@ -0,0 +1,125 @@
 """
 WorkerPool — manages N worker threads via ThreadPoolExecutor.
 Demonstrates: Python concurrency — threading (Interview Topic 2).
 """
 import logging
 import threading
 from concurrent.futures import Future, ThreadPoolExecutor
 from typing import Any, Callable, Dict, List, Optional
 from .models import ChunkResult
 from .processor import (
    ChecksumProcessor,
    CompositeProcessor,
    FFmpegExtractProcessor,
    Processor,
    SimulatedDecodeProcessor,
 )
 from .queue import ChunkQueue
 from .worker import Worker
 logger = logging.getLogger(__name__)
 def create_processor(
    processor_type: str = "checksum",
    output_dir: Optional[str] = None,
 ) -> Processor:
    """Factory for processor instances."""
    if processor_type == "ffmpeg":
        if not output_dir:
            raise ValueError("output_dir required for ffmpeg processor")
        return FFmpegExtractProcessor(output_dir=output_dir)
    elif processor_type == "checksum":
        return ChecksumProcessor()
    elif processor_type == "simulated_decode":
        return SimulatedDecodeProcessor()
    elif processor_type == "composite":
        return CompositeProcessor([
            ChecksumProcessor(),
            SimulatedDecodeProcessor(ms_per_second=50.0),
        ])
    else:
        raise ValueError(f"Unknown processor type: {processor_type}")
 class WorkerPool:
    """
    Manages N worker threads that process chunks concurrently.
    Args:
        num_workers: Number of concurrent worker threads (default: 4)
        chunk_queue: Shared queue to pull chunks from
        processor_type: Type of processor for each worker (default: "checksum")
        max_retries: Max retry attempts per chunk (default: 3)
        event_callback: Optional callback for real-time events
    """
    def __init__(
        self,
        num_workers: int = 4,
        chunk_queue: Optional[ChunkQueue] = None,
        processor_type: str = "checksum",
        max_retries: int = 3,
        event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
        output_dir: Optional[str] = None,
    ):
        self.num_workers = num_workers
        self.chunk_queue = chunk_queue or ChunkQueue()
        self.processor_type = processor_type
        self.max_retries = max_retries
        self.event_callback = event_callback
        self.output_dir = output_dir
        self.shutdown_event = threading.Event()
        self._executor: Optional[ThreadPoolExecutor] = None
        self._futures: List[Future] = []
        self._workers: List[Worker] = []
    def start(self) -> None:
        """Start all worker threads."""
        self._executor = ThreadPoolExecutor(
            max_workers=self.num_workers,
            thread_name_prefix="chunk-worker",
        )
        for i in range(self.num_workers):
            worker = Worker(
                worker_id=f"worker-{i}",
                chunk_queue=self.chunk_queue,
                processor=create_processor(self.processor_type, output_dir=self.output_dir),
                max_retries=self.max_retries,
                event_callback=self.event_callback,
            )
            self._workers.append(worker)
            future = self._executor.submit(worker.run)
            self._futures.append(future)
        logger.info(f"WorkerPool started with {self.num_workers} workers")
    def wait(self) -> List[ChunkResult]:
        """Wait for all workers to finish and collect results."""
        all_results = []
        for future in self._futures:
            results = future.result()
            all_results.extend(results)
        return all_results
    def shutdown(self) -> None:
        """Signal shutdown and cleanup."""
        self.shutdown_event.set()
        self.chunk_queue.close()
        if self._executor:
            self._executor.shutdown(wait=True)
    def get_worker_stats(self) -> Dict[str, Any]:
        """Get per-worker statistics."""
        return {
            w.worker_id: {
                "processed": w.processed_count,
                "errors": w.error_count,
                "retries": w.retry_count,
            }
            for w in self._workers
        }
--- a/core/chunker/processor.py
+++ b/core/chunker/processor.py
@@ -0,0 +1,173 @@
 """
 Processor ABC and concrete implementations.
 Demonstrates: OOP design principles — ABC, inheritance, composition (Interview Topic 4).
 """
 import hashlib
 import time
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import List
 from .exceptions import ChunkChecksumError
 from .models import Chunk, ChunkResult
 class Processor(ABC):
    """
    Abstract base class for chunk processors.
    Each processor defines how a single chunk is processed.
    The Worker calls processor.process(chunk) and handles retries.
    """
    @abstractmethod
    def process(self, chunk: Chunk) -> ChunkResult:
        """Process a single chunk and return the result."""
        pass
 class FFmpegExtractProcessor(Processor):
    """
    Extracts a time segment from the source file using FFmpeg stream copy.
    Produces a playable mp4 file per chunk — no re-encoding.
    Args:
        output_dir: Directory to write chunk mp4 files
    """
    def __init__(self, output_dir: str):
        self.output_dir = output_dir
        Path(output_dir).mkdir(parents=True, exist_ok=True)
    def process(self, chunk: Chunk) -> ChunkResult:
        from core.ffmpeg.transcode import TranscodeConfig, transcode
        start = time.monotonic()
        output_file = str(
            Path(self.output_dir) / f"chunk_{chunk.sequence:04d}.mp4"
        )
        config = TranscodeConfig(
            input_path=chunk.source_path,
            output_path=output_file,
            video_codec="copy",
            audio_codec="copy",
            trim_start=chunk.start_time,
            trim_end=chunk.end_time,
        )
        transcode(config)
        # Compute checksum of output file
        md5 = hashlib.md5()
        with open(output_file, "rb") as f:
            for block in iter(lambda: f.read(8192), b""):
                md5.update(block)
        checksum = md5.hexdigest()
        elapsed = time.monotonic() - start
        return ChunkResult(
            sequence=chunk.sequence,
            success=True,
            checksum_valid=True,
            processing_time=elapsed,
            output_file=output_file,
        )
 class ChecksumProcessor(Processor):
    """
    Validates chunk metadata consistency.
    For time-based chunks, verifies the time range is valid.
    Raises ChunkChecksumError on invalid ranges.
    """
    def process(self, chunk: Chunk) -> ChunkResult:
        start = time.monotonic()
        valid = chunk.duration > 0 and chunk.end_time > chunk.start_time
        if not valid:
            raise ChunkChecksumError(
                sequence=chunk.sequence,
                expected="valid time range",
                actual=f"{chunk.start_time}-{chunk.end_time}",
            )
        elapsed = time.monotonic() - start
        return ChunkResult(
            sequence=chunk.sequence,
            success=True,
            checksum_valid=True,
            processing_time=elapsed,
        )
 class SimulatedDecodeProcessor(Processor):
    """
    Simulates decode work by sleeping proportional to chunk duration.
    Useful for demonstrating concurrency behavior without real FFmpeg.
    Args:
        ms_per_second: Milliseconds of simulated work per second of chunk duration (default: 100)
    """
    def __init__(self, ms_per_second: float = 100.0):
        self.ms_per_second = ms_per_second
    def process(self, chunk: Chunk) -> ChunkResult:
        start = time.monotonic()
        sleep_time = (self.ms_per_second * chunk.duration) / 1000.0
        time.sleep(sleep_time)
        elapsed = time.monotonic() - start
        return ChunkResult(
            sequence=chunk.sequence,
            success=True,
            checksum_valid=True,
            processing_time=elapsed,
        )
 class CompositeProcessor(Processor):
    """
    Chains multiple processors — runs each in sequence on the same chunk.
    Demonstrates OOP composition pattern.
    Args:
        processors: List of processors to chain
    """
    def __init__(self, processors: List[Processor]):
        if not processors:
            raise ValueError("CompositeProcessor requires at least one processor")
        self.processors = processors
    def process(self, chunk: Chunk) -> ChunkResult:
        start = time.monotonic()
        last_result = None
        for proc in self.processors:
            last_result = proc.process(chunk)
            if not last_result.success:
                return last_result
        elapsed = time.monotonic() - start
        return ChunkResult(
            sequence=chunk.sequence,
            success=True,
            checksum_valid=last_result.checksum_valid if last_result else True,
            processing_time=elapsed,
        )
--- a/core/chunker/queue.py
+++ b/core/chunker/queue.py
@@ -0,0 +1,76 @@
 """
 ChunkQueue — bounded, thread-safe queue with sentinel-based shutdown.
 Demonstrates: Core data structures — queue.Queue (Interview Topic 5).
 """
 import queue
 from typing import Optional
 from .models import Chunk
 # Sentinel value to signal workers to stop
 _SENTINEL = object()
 class ChunkQueue:
    """
    Thread-safe bounded queue for chunks.
    Provides backpressure: producers block when the queue is full,
    preventing unbounded memory usage.
    Args:
        maxsize: Maximum number of chunks in the queue (default: 10)
    """
    def __init__(self, maxsize: int = 10):
        self._queue: queue.Queue = queue.Queue(maxsize=maxsize)
        self._closed = False
        self.maxsize = maxsize
    def put(self, chunk: Chunk, timeout: Optional[float] = None) -> None:
        """
        Add a chunk to the queue. Blocks if full (backpressure).
        Args:
            chunk: The chunk to enqueue
            timeout: Max seconds to wait (None = block forever)
        Raises:
            queue.Full: If timeout expires while queue is full
        """
        self._queue.put(chunk, timeout=timeout)
    def get(self, timeout: Optional[float] = None) -> Optional[Chunk]:
        """
        Get next chunk from queue. Returns None if queue is closed.
        Args:
            timeout: Max seconds to wait (None = block forever)
        Returns:
            Chunk or None (if sentinel received, meaning queue is closed)
        Raises:
            queue.Empty: If timeout expires while queue is empty
        """
        item = self._queue.get(timeout=timeout)
        if item is _SENTINEL:
            # Re-put sentinel so other workers also see it
            self._queue.put(_SENTINEL)
            return None
        return item
    def close(self) -> None:
        """Signal all consumers to stop by inserting a sentinel."""
        self._closed = True
        self._queue.put(_SENTINEL)
    @property
    def is_closed(self) -> bool:
        return self._closed
    def qsize(self) -> int:
        """Current number of items in the queue (approximate)."""
        return self._queue.qsize()
--- a/core/chunker/worker.py
+++ b/core/chunker/worker.py
@@ -0,0 +1,141 @@
 """
 Worker — pulls chunks from queue, processes with retry logic.
 Demonstrates:
 - Exception handling and resilient code (Interview Topic 7)
 - Concurrency (Interview Topic 2) — workers run in thread pool
 """
 import logging
 import queue
 import time
 from typing import Any, Callable, Dict, Optional
 from .exceptions import ProcessorFailureError
 from .models import Chunk, ChunkResult
 from .processor import Processor
 from .queue import ChunkQueue
 logger = logging.getLogger(__name__)
 class Worker:
    """
    Processes chunks from a queue with retry and exponential backoff.
    Args:
        worker_id: Identifier for this worker (e.g. "worker-0")
        chunk_queue: Source queue to pull chunks from
        processor: Processor instance to use
        max_retries: Maximum retry attempts per chunk (default: 3)
        event_callback: Optional callback for real-time status updates
    """
    def __init__(
        self,
        worker_id: str,
        chunk_queue: ChunkQueue,
        processor: Processor,
        max_retries: int = 3,
        event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
    ):
        self.worker_id = worker_id
        self.chunk_queue = chunk_queue
        self.processor = processor
        self.max_retries = max_retries
        self.event_callback = event_callback
        self.processed_count = 0
        self.error_count = 0
        self.retry_count = 0
    def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
        """Emit an event if callback is registered."""
        if self.event_callback:
            self.event_callback(event_type, {"worker_id": self.worker_id, **data})
    def _process_with_retry(self, chunk: Chunk) -> ChunkResult:
        """
        Process a chunk with exponential backoff retry.
        Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt)
        """
        last_error = None
        for attempt in range(self.max_retries + 1):
            try:
                if attempt > 0:
                    backoff = 0.1 * (2 ** (attempt - 1))
                    self._emit("chunk_retry", {
                        "sequence": chunk.sequence,
                        "attempt": attempt,
                        "backoff": backoff,
                    })
                    time.sleep(backoff)
                    self.retry_count += 1
                result = self.processor.process(chunk)
                result.retries = attempt
                result.worker_id = self.worker_id
                return result
            except Exception as e:
                last_error = e
                logger.warning(
                    f"{self.worker_id}: chunk {chunk.sequence} "
                    f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}"
                )
        # All retries exhausted
        self.error_count += 1
        self._emit("chunk_error", {
            "sequence": chunk.sequence,
            "error": str(last_error),
            "retries": self.max_retries,
        })
        return ChunkResult(
            sequence=chunk.sequence,
            success=False,
            processing_time=0.0,
            error=str(last_error),
            retries=self.max_retries,
            worker_id=self.worker_id,
        )
    def run(self) -> list[ChunkResult]:
        """
        Main worker loop — pull chunks and process until queue is closed.
        Returns:
            List of ChunkResults processed by this worker
        """
        results = []
        self._emit("worker_status", {"state": "idle"})
        while True:
            try:
                chunk = self.chunk_queue.get(timeout=1.0)
            except queue.Empty:
                continue
            if chunk is None:  # Sentinel received
                break
            self._emit("chunk_processing", {
                "sequence": chunk.sequence,
                "state": "processing",
            })
            result = self._process_with_retry(chunk)
            results.append(result)
            self.processed_count += 1
            self._emit("chunk_done", {
                "sequence": chunk.sequence,
                "success": result.success,
                "processing_time": result.processing_time,
                "retries": result.retries,
            })
        self._emit("worker_status", {"state": "stopped"})
        return results
--- a/core/jobs/init.py
+++ b/core/jobs/init.py
@@ -0,0 +1,15 @@
 """
 MPR Jobs Module
 Provides executor abstraction and task dispatch for job processing.
 """
 from .executor import Executor, LocalExecutor, get_executor
 from .task import run_job
 __all__ = [
    "Executor",
    "LocalExecutor",
    "get_executor",
    "run_job",
 ]
--- a/core/jobs/executor.py
+++ b/core/jobs/executor.py
@@ -1,17 +1,16 @@
 """
 Executor abstraction for job processing.
-Supports different backends:
+Determines WHERE jobs run:
- LocalExecutor: FFmpeg via Celery (default)
+- LocalExecutor: delegates to registered Handler (default)
- LambdaExecutor: AWS Lambda (future)
+- LambdaExecutor: AWS Step Functions
 - GCPExecutor: Google Cloud Run Jobs
 """
 import os
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, Optional
 from core.ffmpeg.transcode import TranscodeConfig, transcode
 # Configuration from environment
 MPR_EXECUTOR = os.environ.get("MPR_EXECUTOR", "local")
@@ -22,26 +21,18 @@ class Executor(ABC):
    @abstractmethod
    def run(
        self,
        job_type: str,
        job_id: str,
-        source_path: str,
+        payload: Dict[str, Any],
        output_path: str,
        preset: Optional[Dict[str, Any]] = None,
        trim_start: Optional[float] = None,
        trim_end: Optional[float] = None,
        duration: Optional[float] = None,
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> bool:
        """
-        Execute a transcode/trim job.
+        Execute a job.
        Args:
            job_type: Type of job ("transcode", "chunk", etc.)
            job_id: Unique job identifier
-            source_path: Path to source file
+            payload: Job-type-specific configuration dict
            output_path: Path for output file
            preset: Transcode preset dict (optional, None = trim only)
            trim_start: Trim start time in seconds (optional)
            trim_end: Trim end time in seconds (optional)
            duration: Source duration in seconds (for progress calculation)
            progress_callback: Called with (percent, details_dict)
        Returns:
@@ -51,62 +42,25 @@ class Executor(ABC):
 class LocalExecutor(Executor):
-    """Execute jobs locally using FFmpeg."""
+    """Execute jobs locally using registered handlers."""
    def run(
        self,
        job_type: str,
        job_id: str,
-        source_path: str,
+        payload: Dict[str, Any],
        output_path: str,
        preset: Optional[Dict[str, Any]] = None,
        trim_start: Optional[float] = None,
        trim_end: Optional[float] = None,
        duration: Optional[float] = None,
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> bool:
-        """Execute job using local FFmpeg."""
+        """Execute job using the appropriate local handler."""
        from .registry import get_handler
-        # Build config from preset or use stream copy for trim-only
+        handler = get_handler(job_type)
-        if preset:
+        result = handler.process(
-            config = TranscodeConfig(
+            job_id=job_id,
-                input_path=source_path,
+            payload=payload,
-                output_path=output_path,
+            progress_callback=progress_callback,
                video_codec=preset.get("video_codec", "libx264"),
                video_bitrate=preset.get("video_bitrate"),
                video_crf=preset.get("video_crf"),
                video_preset=preset.get("video_preset"),
                resolution=preset.get("resolution"),
                framerate=preset.get("framerate"),
                audio_codec=preset.get("audio_codec", "aac"),
                audio_bitrate=preset.get("audio_bitrate"),
                audio_channels=preset.get("audio_channels"),
                audio_samplerate=preset.get("audio_samplerate"),
                container=preset.get("container", "mp4"),
                extra_args=preset.get("extra_args", []),
                trim_start=trim_start,
                trim_end=trim_end,
            )
        else:
            # Trim-only: stream copy
            config = TranscodeConfig(
                input_path=source_path,
                output_path=output_path,
                video_codec="copy",
                audio_codec="copy",
                trim_start=trim_start,
                trim_end=trim_end,
            )
        # Wrapper to convert float percent to int
        def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
            if progress_callback:
                progress_callback(int(percent), details)
        return transcode(
            config,
            duration=duration,
            progress_callback=wrapped_callback if progress_callback else None,
        )
        return result.get("status") == "completed"
 class LambdaExecutor(Executor):
@@ -123,26 +77,18 @@ class LambdaExecutor(Executor):
    def run(
        self,
        job_type: str,
        job_id: str,
-        source_path: str,
+        payload: Dict[str, Any],
        output_path: str,
        preset: Optional[Dict[str, Any]] = None,
        trim_start: Optional[float] = None,
        trim_end: Optional[float] = None,
        duration: Optional[float] = None,
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> bool:
        """Start a Step Functions execution for this job."""
        import json
-        payload = {
+        sfn_payload = {
            "job_type": job_type,
            "job_id": job_id,
-            "source_key": source_path,
+            **payload,
            "output_key": output_path,
            "preset": preset,
            "trim_start": trim_start,
            "trim_end": trim_end,
            "duration": duration,
            "callback_url": self.callback_url,
            "api_key": self.callback_api_key,
        }
@@ -150,10 +96,9 @@ class LambdaExecutor(Executor):
        response = self.sfn.start_execution(
            stateMachineArn=self.state_machine_arn,
            name=f"mpr-{job_id}",
-            input=json.dumps(payload),
+            input=json.dumps(sfn_payload),
        )
        # Store execution ARN on the job
        execution_arn = response["executionArn"]
        try:
            from core.db import update_job_fields
@@ -179,13 +124,9 @@ class GCPExecutor(Executor):
    def run(
        self,
        job_type: str,
        job_id: str,
-        source_path: str,
+        payload: Dict[str, Any],
        output_path: str,
        preset: Optional[Dict[str, Any]] = None,
        trim_start: Optional[float] = None,
        trim_end: Optional[float] = None,
        duration: Optional[float] = None,
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> bool:
        """Trigger a Cloud Run Job execution for this job."""
@@ -193,14 +134,10 @@ class GCPExecutor(Executor):
        from google.cloud import run_v2
-        payload = {
+        gcp_payload = {
            "job_type": job_type,
            "job_id": job_id,
-            "source_key": source_path,
+            **payload,
            "output_key": output_path,
            "preset": preset,
            "trim_start": trim_start,
            "trim_end": trim_end,
            "duration": duration,
            "callback_url": self.callback_url,
            "api_key": self.callback_api_key,
        }
@@ -216,7 +153,8 @@ class GCPExecutor(Executor):
                    run_v2.RunJobRequest.Overrides.ContainerOverride(
                        env=[
                            run_v2.EnvVar(
-                                name="MPR_JOB_PAYLOAD", value=json.dumps(payload)
+                                name="MPR_JOB_PAYLOAD",
                                value=json.dumps(gcp_payload),
                            )
                        ]
                    )
--- a/core/jobs/gcp_handler.py
+++ b/core/jobs/gcp_handler.py
--- a/core/jobs/handlers/init.py
+++ b/core/jobs/handlers/init.py
@@ -0,0 +1,5 @@
 """Job handlers — type-specific execution logic."""
 from .base import Handler
 __all__ = ["Handler"]
--- a/core/jobs/handlers/base.py
+++ b/core/jobs/handlers/base.py
@@ -0,0 +1,33 @@
 """
 Base Handler ABC — defines the interface for job-type-specific execution logic.
 A Handler knows HOW to execute a specific kind of job (transcode, chunk, etc.).
 The Executor decides WHERE to run it (local, Lambda, GCP).
 """
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, Optional
 class Handler(ABC):
    """Abstract base class for job handlers."""
    @abstractmethod
    def process(
        self,
        job_id: str,
        payload: Dict[str, Any],
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> Dict[str, Any]:
        """
        Execute job-specific logic.
        Args:
            job_id: Unique job identifier
            payload: Job-type-specific configuration
            progress_callback: Called with (percent, details_dict)
        Returns:
            Result dict with at least {"status": "completed"} or raises
        """
        pass
--- a/core/jobs/handlers/chunk.py
+++ b/core/jobs/handlers/chunk.py
@@ -0,0 +1,119 @@
 """
 ChunkHandler — job handler that wraps the chunker Pipeline.
 Downloads source from S3/MinIO, runs FFmpeg chunking pipeline,
 uploads mp4 segments + manifest back to S3/MinIO.
 """
 import logging
 import os
 import shutil
 import tempfile
 from typing import Any, Callable, Dict, Optional
 from core.chunker import Pipeline
 from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
 from .base import Handler
 logger = logging.getLogger(__name__)
 class ChunkHandler(Handler):
    """
    Handles chunk processing jobs by delegating to the chunker Pipeline.
    Expected payload keys:
        source_key: str — S3 key of the source file in BUCKET_IN
        chunk_duration: float — seconds per chunk (default: 10.0)
        num_workers: int — concurrent workers (default: 4)
        max_retries: int — retries per chunk (default: 3)
        processor_type: str — "ffmpeg", "checksum", "simulated_decode", "composite"
        queue_size: int — max queue depth (default: 10)
    """
    def process(
        self,
        job_id: str,
        payload: Dict[str, Any],
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> Dict[str, Any]:
        source_key = payload["source_key"]
        processor_type = payload.get("processor_type", "ffmpeg")
        logger.info(f"ChunkHandler starting job {job_id}: {source_key}")
        # Download source from S3/MinIO
        tmp_source = download_to_temp(BUCKET_IN, source_key)
        # Create temp output directory for chunks
        tmp_output_dir = tempfile.mkdtemp(prefix=f"chunks-{job_id}-")
        try:
            def event_bridge(event_type: str, data: Dict[str, Any]) -> None:
                """Bridge pipeline events to the job progress callback."""
                if progress_callback and event_type == "pipeline_complete":
                    progress_callback(100, data)
                elif progress_callback and event_type == "chunk_done":
                    total = data.get("total_chunks", 1)
                    if total > 0:
                        pct = min(int((data.get("sequence", 0) + 1) / total * 100), 99)
                        progress_callback(pct, data)
            pipeline = Pipeline(
                source=tmp_source,
                chunk_duration=payload.get("chunk_duration", 10.0),
                num_workers=payload.get("num_workers", 4),
                max_retries=payload.get("max_retries", 3),
                processor_type=processor_type,
                queue_size=payload.get("queue_size", 10),
                event_callback=event_bridge,
                output_dir=tmp_output_dir if processor_type == "ffmpeg" else None,
            )
            result = pipeline.run()
            # Upload chunks + manifest to S3/MinIO
            output_prefix = f"chunks/{job_id}"
            uploaded_files = []
            for chunk_file in result.chunk_files:
                filename = os.path.basename(chunk_file)
                output_key = f"{output_prefix}/{filename}"
                upload_file(chunk_file, BUCKET_OUT, output_key)
                uploaded_files.append(output_key)
                logger.info(f"Uploaded {output_key}")
            # Upload manifest
            manifest_path = os.path.join(tmp_output_dir, "manifest.json")
            if os.path.exists(manifest_path):
                manifest_key = f"{output_prefix}/manifest.json"
                upload_file(manifest_path, BUCKET_OUT, manifest_key)
                uploaded_files.append(manifest_key)
                logger.info(f"Uploaded {manifest_key}")
            return {
                "status": "completed" if result.failed == 0 else "completed_with_errors",
                "total_chunks": result.total_chunks,
                "processed": result.processed,
                "failed": result.failed,
                "retries": result.retries,
                "elapsed_time": result.elapsed_time,
                "throughput_mbps": result.throughput_mbps,
                "worker_stats": result.worker_stats,
                "errors": result.errors,
                "chunks_in_order": result.chunks_in_order,
                "output_prefix": output_prefix,
                "uploaded_files": uploaded_files,
            }
        finally:
            # Cleanup temp files
            try:
                os.unlink(tmp_source)
            except OSError:
                pass
            try:
                shutil.rmtree(tmp_output_dir, ignore_errors=True)
            except OSError:
                pass
--- a/core/jobs/handlers/transcode.py
+++ b/core/jobs/handlers/transcode.py
@@ -0,0 +1,104 @@
 """
 TranscodeHandler — executes transcode/trim jobs using FFmpeg.
 Extracted from the old tasks.py Celery task logic.
 """
 import logging
 import os
 import tempfile
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional
 from core.ffmpeg.transcode import TranscodeConfig, transcode
 from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
 from .base import Handler
 logger = logging.getLogger(__name__)
 class TranscodeHandler(Handler):
    """Handle transcode and trim jobs via FFmpeg."""
    def process(
        self,
        job_id: str,
        payload: Dict[str, Any],
        progress_callback: Optional[Callable[[int, Dict[str, Any]], None]] = None,
    ) -> Dict[str, Any]:
        source_key = payload["source_key"]
        output_key = payload["output_key"]
        preset = payload.get("preset")
        trim_start = payload.get("trim_start")
        trim_end = payload.get("trim_end")
        duration = payload.get("duration")
        logger.info(f"TranscodeHandler: {source_key} -> {output_key}")
        # Download source
        tmp_source = download_to_temp(BUCKET_IN, source_key)
        ext = Path(output_key).suffix or ".mp4"
        fd, tmp_output = tempfile.mkstemp(suffix=ext)
        os.close(fd)
        try:
            if preset:
                config = TranscodeConfig(
                    input_path=tmp_source,
                    output_path=tmp_output,
                    video_codec=preset.get("video_codec", "libx264"),
                    video_bitrate=preset.get("video_bitrate"),
                    video_crf=preset.get("video_crf"),
                    video_preset=preset.get("video_preset"),
                    resolution=preset.get("resolution"),
                    framerate=preset.get("framerate"),
                    audio_codec=preset.get("audio_codec", "aac"),
                    audio_bitrate=preset.get("audio_bitrate"),
                    audio_channels=preset.get("audio_channels"),
                    audio_samplerate=preset.get("audio_samplerate"),
                    container=preset.get("container", "mp4"),
                    extra_args=preset.get("extra_args", []),
                    trim_start=trim_start,
                    trim_end=trim_end,
                )
            else:
                config = TranscodeConfig(
                    input_path=tmp_source,
                    output_path=tmp_output,
                    video_codec="copy",
                    audio_codec="copy",
                    trim_start=trim_start,
                    trim_end=trim_end,
                )
            def wrapped_callback(percent: float, details: Dict[str, Any]) -> None:
                if progress_callback:
                    progress_callback(int(percent), details)
            success = transcode(
                config,
                duration=duration,
                progress_callback=wrapped_callback if progress_callback else None,
            )
            if not success:
                raise RuntimeError("Transcode returned False")
            # Upload result
            logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
            upload_file(tmp_output, BUCKET_OUT, output_key)
            return {
                "status": "completed",
                "job_id": job_id,
                "output_key": output_key,
            }
        finally:
            for f in [tmp_source, tmp_output]:
                try:
                    os.unlink(f)
                except OSError:
                    pass
--- a/core/jobs/lambda_handler.py
+++ b/core/jobs/lambda_handler.py
--- a/core/jobs/registry.py
+++ b/core/jobs/registry.py
@@ -0,0 +1,33 @@
 """
 Handler registry — maps job_type strings to Handler classes.
 """
 from typing import Dict, Type
 from .handlers.base import Handler
 _handlers: Dict[str, Type[Handler]] = {}
 def register_handler(job_type: str, handler_class: Type[Handler]) -> None:
    """Register a handler class for a job type."""
    _handlers[job_type] = handler_class
 def get_handler(job_type: str) -> Handler:
    """Get an instantiated handler for a job type."""
    if job_type not in _handlers:
        raise ValueError(f"Unknown job type: {job_type}")
    return _handlers[job_type]()
 def _register_defaults() -> None:
    """Register built-in handlers."""
    from .handlers.chunk import ChunkHandler
    from .handlers.transcode import TranscodeHandler
    register_handler("transcode", TranscodeHandler)
    register_handler("chunk", ChunkHandler)
 _register_defaults()
--- a/core/jobs/task.py
+++ b/core/jobs/task.py
@@ -0,0 +1,64 @@
 """
 Celery task for job processing.
 Generic dispatcher — routes to the appropriate handler based on job_type.
 """
 import logging
 from typing import Any, Dict
 from celery import shared_task
 from core.rpc.server import update_job_progress
 logger = logging.getLogger(__name__)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
 def run_job(
    self,
    job_type: str,
    job_id: str,
    payload: Dict[str, Any],
 ) -> Dict[str, Any]:
    """
    Generic Celery task — dispatches to the registered handler for job_type.
    """
    logger.info(f"Starting {job_type} job {job_id}")
    update_job_progress(job_id, progress=0, status="processing")
    def progress_callback(percent: int, details: Dict[str, Any]) -> None:
        update_job_progress(
            job_id,
            progress=percent,
            current_time=details.get("time", 0.0),
            status="processing",
        )
    try:
        from .registry import get_handler
        handler = get_handler(job_type)
        result = handler.process(
            job_id=job_id,
            payload=payload,
            progress_callback=progress_callback,
        )
        logger.info(f"Job {job_id} completed successfully")
        update_job_progress(job_id, progress=100, status="completed")
        return result
    except Exception as e:
        logger.exception(f"Job {job_id} failed: {e}")
        update_job_progress(job_id, progress=0, status="failed", error=str(e))
        if self.request.retries < self.max_retries:
            raise self.retry(exc=e)
        return {
            "status": "failed",
            "job_id": job_id,
            "error": str(e),
        }
--- a/core/rpc/server.py
+++ b/core/rpc/server.py
@@ -59,17 +59,24 @@ class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer):
            # Dispatch to Celery if available
            if self.celery_app:
-                from core.task.tasks import run_transcode_job
+                from core.jobs.task import run_job
-                task = run_transcode_job.delay(
+                payload = {
-                    job_id=job_id,
+                    "source_key": request.source_path,
-                    source_path=request.source_path,
+                    "output_key": request.output_path,
-                    output_path=request.output_path,
+                    "preset": preset,
-                    preset=preset,
+                    "trim_start": request.trim_start
                    trim_start=request.trim_start
                    if request.HasField("trim_start")
                    else None,
-                    trim_end=request.trim_end if request.HasField("trim_end") else None,
+                    "trim_end": request.trim_end
                    if request.HasField("trim_end")
                    else None,
                }
                task = run_job.delay(
                    job_type="transcode",
                    job_id=job_id,
                    payload=payload,
                )
                _active_jobs[job_id]["celery_task_id"] = task.id
@@ -197,11 +204,14 @@ def update_job_progress(
    speed: float = 0.0,
    status: str = "processing",
    error: str = None,
    **extra,
 ) -> None:
    """
    Update job progress (called from worker tasks).
    Updates both the in-memory gRPC state and the Django database.
    Extra kwargs are stored for chunker-specific fields (total_chunks,
    processed_chunks, failed_chunks, throughput_mbps, etc.).
    """
    if job_id in _active_jobs:
        _active_jobs[job_id].update(
@@ -212,6 +222,7 @@ def update_job_progress(
                "speed": speed,
                "status": status,
                "error": error,
                **extra,
            }
        )
--- a/core/schema/models/init.py
+++ b/core/schema/models/init.py
@@ -23,12 +23,12 @@ from .grpc import (
    ProgressUpdate,
    WorkerStatus,
 )
-from .jobs import JobStatus, TranscodeJob
+from .jobs import ChunkJob, ChunkJobStatus, JobStatus, TranscodeJob
 from .media import AssetStatus, MediaAsset
 from .presets import BUILTIN_PRESETS, TranscodePreset
 # Core domain models - generates Django, Pydantic, TypeScript
-DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob]
+DATACLASSES = [MediaAsset, TranscodePreset, TranscodeJob, ChunkJob]
 # API request/response models - generates TypeScript only (no Django)
 # WorkerStatus from grpc.py is reused here
@@ -42,7 +42,7 @@ API_MODELS = [
 ]
 # Status enums - included in generated code
-ENUMS = [AssetStatus, JobStatus]
+ENUMS = [AssetStatus, JobStatus, ChunkJobStatus]
 # gRPC messages - generates Proto
 GRPC_MESSAGES = [
@@ -61,6 +61,7 @@ __all__ = [
    "MediaAsset",
    "TranscodePreset",
    "TranscodeJob",
    "ChunkJob",
    # API Models
    "CreateJobRequest",
    "UpdateAssetRequest",
@@ -70,6 +71,7 @@ __all__ = [
    # Enums
    "AssetStatus",
    "JobStatus",
    "ChunkJobStatus",
    # gRPC
    "GRPC_SERVICE",
    "JobRequest",
--- a/core/schema/models/jobs.py
+++ b/core/schema/models/jobs.py
@@ -1,13 +1,14 @@
 """
-TranscodeJob Schema Definition
+Job Schema Definitions
-Source of truth for job data model.
+Source of truth for job data models.
 TranscodeJob and ChunkJob share common lifecycle fields by convention.
 """
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 from uuid import UUID
@@ -77,3 +78,56 @@ class TranscodeJob:
        return self.preset_id is None and (
            self.trim_start is not None or self.trim_end is not None
        )
 class ChunkJobStatus(str, Enum):
    """Status of a chunk pipeline job."""
    PENDING = "pending"
    CHUNKING = "chunking"
    PROCESSING = "processing"
    COLLECTING = "collecting"
    COMPLETED = "completed"
    FAILED = "failed"
    CANCELLED = "cancelled"
@dataclass
 class ChunkJob:
    """
    A chunk pipeline job — splits a media file into chunks and processes them
    through a concurrent worker pool.
    """
    id: UUID
    # Input
    source_asset_id: UUID
    # Configuration
    chunk_duration: float = 10.0  # seconds
    num_workers: int = 4
    max_retries: int = 3
    processor_type: str = "ffmpeg"  # "ffmpeg", "checksum", "simulated_decode", "composite"
    # Status & Progress
    status: ChunkJobStatus = ChunkJobStatus.PENDING
    progress: float = 0.0  # 0.0 to 100.0
    total_chunks: int = 0
    processed_chunks: int = 0
    failed_chunks: int = 0
    retry_count: int = 0
    error_message: Optional[str] = None
    # Result stats
    throughput_mbps: Optional[float] = None
    elapsed_seconds: Optional[float] = None
    # Worker tracking
    celery_task_id: Optional[str] = None
    priority: int = 0  # Lower = higher priority
    # Timestamps
    created_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None
--- a/core/task/init.py
+++ b/core/task/init.py
@@ -1,15 +0,0 @@
 """
 MPR Worker Module
 Provides executor abstraction and Celery tasks for job processing.
 """
 from .executor import Executor, LocalExecutor, get_executor
 from .tasks import run_transcode_job
 __all__ = [
    "Executor",
    "LocalExecutor",
    "get_executor",
    "run_transcode_job",
 ]
--- a/core/task/tasks.py
+++ b/core/task/tasks.py
@@ -1,105 +0,0 @@
 """
 Celery tasks for job processing.
 """
 import logging
 import os
 from typing import Any, Dict, Optional
 from celery import shared_task
 from core.storage import BUCKET_IN, BUCKET_OUT, download_to_temp, upload_file
 from core.rpc.server import update_job_progress
 from core.task.executor import get_executor
 logger = logging.getLogger(__name__)
@shared_task(bind=True, queue="transcode", max_retries=3, default_retry_delay=60)
 def run_transcode_job(
    self,
    job_id: str,
    source_key: str,
    output_key: str,
    preset: Optional[Dict[str, Any]] = None,
    trim_start: Optional[float] = None,
    trim_end: Optional[float] = None,
    duration: Optional[float] = None,
 ) -> Dict[str, Any]:
    """
    Celery task to run a transcode/trim job.
    Downloads source from S3, runs FFmpeg, uploads result to S3.
    """
    logger.info(f"Starting job {job_id}: {source_key} -> {output_key}")
    update_job_progress(job_id, progress=0, status="processing")
    # Download source from S3 to temp file
    logger.info(f"Downloading {source_key} from {BUCKET_IN}")
    tmp_source = download_to_temp(BUCKET_IN, source_key)
    # Create temp output path with same extension
    import tempfile
    from pathlib import Path
    ext = Path(output_key).suffix or ".mp4"
    fd, tmp_output = tempfile.mkstemp(suffix=ext)
    os.close(fd)
    def progress_callback(percent: int, details: Dict[str, Any]) -> None:
        update_job_progress(
            job_id,
            progress=percent,
            current_time=details.get("time", 0.0),
            status="processing",
        )
    try:
        executor = get_executor()
        success = executor.run(
            job_id=job_id,
            source_path=tmp_source,
            output_path=tmp_output,
            preset=preset,
            trim_start=trim_start,
            trim_end=trim_end,
            duration=duration,
            progress_callback=progress_callback,
        )
        if success:
            # Upload result to S3
            logger.info(f"Uploading {output_key} to {BUCKET_OUT}")
            upload_file(tmp_output, BUCKET_OUT, output_key)
            logger.info(f"Job {job_id} completed successfully")
            update_job_progress(job_id, progress=100, status="completed")
            return {
                "status": "completed",
                "job_id": job_id,
                "output_key": output_key,
            }
        else:
            raise RuntimeError("Executor returned False")
    except Exception as e:
        logger.exception(f"Job {job_id} failed: {e}")
        update_job_progress(job_id, progress=0, status="failed", error=str(e))
        if self.request.retries < self.max_retries:
            raise self.retry(exc=e)
        return {
            "status": "failed",
            "job_id": job_id,
            "error": str(e),
        }
    finally:
        # Clean up temp files
        for f in [tmp_source, tmp_output]:
            try:
                os.unlink(f)
            except OSError:
                pass
--- a/ctrl/Dockerfile
+++ b/ctrl/Dockerfile
@@ -5,6 +5,7 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY . .
+# No COPY . . — code is volume-mounted in dev (..:/app)
 # This image only provides the Python runtime + dependencies
 CMD ["python", "admin/manage.py", "runserver", "0.0.0.0:8000"]
--- a/ctrl/Dockerfile.worker
+++ b/ctrl/Dockerfile.worker
@@ -9,6 +9,7 @@ WORKDIR /app
 COPY requirements.txt requirements-worker.txt ./
 RUN pip install --no-cache-dir -r requirements-worker.txt
-COPY . .
+# No COPY . . — code is volume-mounted in dev (..:/app)
 # This image only provides Python runtime + FFmpeg + dependencies
 CMD ["celery", "-A", "admin.mpr", "worker", "--loglevel=info"]
--- a/ctrl/docker-compose.yml
+++ b/ctrl/docker-compose.yml
@@ -17,6 +17,20 @@ x-healthcheck-defaults: &healthcheck-defaults
  timeout: 5s
  retries: 5
 x-python-service: &python-service
  build:
    context: ..
    dockerfile: ctrl/Dockerfile
  volumes:
    - ..:/app
  environment:
    <<: *common-env
  depends_on:
    postgres:
      condition: service_healthy
    redis:
      condition: service_healthy
 services:
  # =============================================================================
  # Infrastructure
@@ -92,47 +106,25 @@ services:
  # =============================================================================
  django:
-    build:
+    <<: *python-service
      context: ..
      dockerfile: ctrl/Dockerfile
    command: >
      bash -c "python admin/manage.py migrate &&
               python admin/manage.py loadbuiltins || true &&
               python admin/manage.py runserver 0.0.0.0:8701"
    ports:
      - "8701:8701"
    environment:
      <<: *common-env
    volumes:
      - ..:/app
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
  fastapi:
-    build:
+    <<: *python-service
      context: ..
      dockerfile: ctrl/Dockerfile
    command: uvicorn core.api.main:app --host 0.0.0.0 --port 8702 --reload
    ports:
      - "8702:8702"
    environment:
      <<: *common-env
      DJANGO_ALLOW_ASYNC_UNSAFE: "true"
    volumes:
      - ..:/app
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
  grpc:
-    build:
+    <<: *python-service
      context: ..
      dockerfile: ctrl/Dockerfile
    command: python -m core.rpc.server
    ports:
      - "50052:50051"
@@ -140,13 +132,6 @@ services:
      <<: *common-env
      GRPC_PORT: 50051
      GRPC_MAX_WORKERS: 10
    volumes:
      - ..:/app
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
  celery:
    build:
--- a/ctrl/lambda/Dockerfile
+++ b/ctrl/lambda/Dockerfile
@@ -14,8 +14,8 @@ COPY ctrl/lambda/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
-COPY core/task/lambda_handler.py ${LAMBDA_TASK_ROOT}/core/task/lambda_handler.py
+COPY core/jobs/lambda_handler.py ${LAMBDA_TASK_ROOT}/core/jobs/lambda_handler.py
-COPY core/task/__init__.py ${LAMBDA_TASK_ROOT}/core/task/__init__.py
+COPY core/jobs/__init__.py ${LAMBDA_TASK_ROOT}/core/jobs/__init__.py
 COPY core/ ${LAMBDA_TASK_ROOT}/core/
-CMD ["core.task.lambda_handler.handler"]
+CMD ["core.jobs.lambda_handler.handler"]
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/chunker/init.py
+++ b/tests/chunker/init.py
--- a/tests/chunker/conftest.py
+++ b/tests/chunker/conftest.py
@@ -0,0 +1,76 @@
 """
 Shared fixtures for chunker tests.
 Demonstrates: TDD and unit testing best practices (Interview Topic 8) — fixtures, temp files.
 """
 import os
 import tempfile
 import pytest
 from core.chunker.models import Chunk, ChunkResult
@pytest.fixture
 def temp_file():
    """Create a temporary file with known content, cleaned up after test."""
    files = []
    def _create(content: bytes = b"x" * 4096):
        f = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
        f.write(content)
        f.close()
        files.append(f.name)
        return f.name
    yield _create
    for path in files:
        if os.path.exists(path):
            os.unlink(path)
@pytest.fixture
 def sample_chunk(temp_file):
    """Create a sample time-based Chunk with valid time range."""
    path = temp_file(b"x" * 1024)
    return Chunk(
        sequence=0,
        start_time=0.0,
        end_time=10.0,
        source_path=path,
        duration=10.0,
    )
@pytest.fixture
 def make_chunk(temp_file):
    """Factory fixture for creating time-based chunks with specific sequence numbers."""
    path = temp_file(b"x" * 1024)
    def _make(sequence: int, duration: float = 10.0) -> Chunk:
        start = sequence * duration
        return Chunk(
            sequence=sequence,
            start_time=start,
            end_time=start + duration,
            source_path=path,
            duration=duration,
        )
    return _make
@pytest.fixture
 def make_result():
    """Factory fixture for creating ChunkResults."""
    def _make(sequence: int, success: bool = True, processing_time: float = 0.01) -> ChunkResult:
        return ChunkResult(
            sequence=sequence,
            success=success,
            processing_time=processing_time,
        )
    return _make
--- a/tests/chunker/test_chunker.py
+++ b/tests/chunker/test_chunker.py
@@ -0,0 +1,149 @@
 """
 Tests for Chunker — time-based segmentation, chunk counts, sequence numbers, generator behavior.
 Demonstrates: TDD (Interview Topic 8) — parametrized tests, edge cases, mocking.
 """
 from unittest.mock import patch, MagicMock
 import pytest
 from core.chunker import Chunker
 from core.chunker.exceptions import ChunkReadError
 def mock_probe(duration):
    """Create a mock probe_file that returns the given duration."""
    result = MagicMock()
    result.duration = duration
    return result
 class TestChunker:
    @patch("core.chunker.chunker.probe_file")
    def test_basic_chunking(self, mock_pf, temp_file):
        """File splits into expected number of time-based chunks."""
        path = temp_file(b"x" * 1000)
        mock_pf.return_value = mock_probe(30.0)
        chunker = Chunker(path, chunk_duration=10.0)
        chunks = list(chunker.chunks())
        assert len(chunks) == 3
        assert chunks[0].start_time == 0.0
        assert chunks[0].end_time == 10.0
        assert chunks[0].duration == 10.0
        assert chunks[1].start_time == 10.0
        assert chunks[2].start_time == 20.0
    @patch("core.chunker.chunker.probe_file")
    def test_sequence_numbers(self, mock_pf, temp_file):
        """Chunks have sequential sequence numbers starting at 0."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(40.0)
        chunker = Chunker(path, chunk_duration=10.0)
        chunks = list(chunker.chunks())
        sequences = [c.sequence for c in chunks]
        assert sequences == [0, 1, 2, 3]
    @patch("core.chunker.chunker.probe_file")
    def test_time_ranges(self, mock_pf, temp_file):
        """Each chunk has correct start_time and end_time."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(25.0)
        chunker = Chunker(path, chunk_duration=10.0)
        chunks = list(chunker.chunks())
        assert chunks[0].start_time == 0.0
        assert chunks[0].end_time == 10.0
        assert chunks[1].start_time == 10.0
        assert chunks[1].end_time == 20.0
        assert chunks[2].start_time == 20.0
        assert chunks[2].end_time == 25.0  # last chunk shorter
        assert chunks[2].duration == 5.0
    @patch("core.chunker.chunker.probe_file")
    def test_expected_chunks_property(self, mock_pf, temp_file):
        """expected_chunks calculates correctly before iteration."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(25.0)
        chunker = Chunker(path, chunk_duration=10.0)
        assert chunker.expected_chunks == 3  # ceil(25/10)
    @patch("core.chunker.chunker.probe_file")
    def test_source_path_on_chunks(self, mock_pf, temp_file):
        """Each chunk carries the source file path."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(10.0)
        chunker = Chunker(path, chunk_duration=10.0)
        chunks = list(chunker.chunks())
        assert all(c.source_path == path for c in chunks)
    def test_file_not_found(self):
        """Non-existent file raises ChunkReadError."""
        with pytest.raises(ChunkReadError, match="File not found"):
            Chunker("/nonexistent/file.mp4")
    @patch("core.chunker.chunker.probe_file")
    def test_invalid_chunk_duration(self, mock_pf, temp_file):
        """Zero or negative chunk_duration raises ValueError."""
        path = temp_file(b"x" * 100)
        with pytest.raises(ValueError, match="chunk_duration must be positive"):
            Chunker(path, chunk_duration=0)
        with pytest.raises(ValueError, match="chunk_duration must be positive"):
            Chunker(path, chunk_duration=-1)
    @patch("core.chunker.chunker.probe_file")
    def test_generator_laziness(self, mock_pf, temp_file):
        """Chunks are yielded lazily, not pre-loaded."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(30.0)
        chunker = Chunker(path, chunk_duration=10.0)
        gen = chunker.chunks()
        first = next(gen)
        assert first.sequence == 0
        # Generator is not exhausted — remaining chunks still pending
    @pytest.mark.parametrize("duration,chunk_dur,expected", [
        (10.0, 10.0, 1),
        (10.1, 10.0, 2),
        (1.0, 1.0, 1),
        (100.0, 1.0, 100),
        (5.0, 100.0, 1),
    ])
    @patch("core.chunker.chunker.probe_file")
    def test_expected_chunks_parametrized(self, mock_pf, temp_file, duration, chunk_dur, expected):
        """Parametrized: various duration/chunk_duration combos."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(duration)
        chunker = Chunker(path, chunk_duration=chunk_dur)
        assert chunker.expected_chunks == expected
    @patch("core.chunker.chunker.probe_file")
    def test_exact_multiple(self, mock_pf, temp_file):
        """Duration exactly divisible by chunk_duration."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(30.0)
        chunker = Chunker(path, chunk_duration=10.0)
        chunks = list(chunker.chunks())
        assert len(chunks) == 3
        assert all(c.duration == 10.0 for c in chunks)
    @patch("core.chunker.chunker.probe_file")
    def test_probe_failure(self, mock_pf, temp_file):
        """Probe failure raises ChunkReadError."""
        path = temp_file(b"x" * 100)
        mock_pf.side_effect = Exception("ffprobe failed")
        with pytest.raises(ChunkReadError, match="Failed to probe"):
            Chunker(path, chunk_duration=10.0)
--- a/tests/chunker/test_collector.py
+++ b/tests/chunker/test_collector.py
@@ -0,0 +1,103 @@
 """
 Tests for ResultCollector — ordered reassembly, out-of-order buffering, duplicates.
 Demonstrates: TDD (Interview Topic 8) — testing algorithms (heapq reassembly).
 """
 import pytest
 from core.chunker.collector import ResultCollector
 from core.chunker.exceptions import ReassemblyError
 class TestResultCollector:
    def test_in_order_emission(self, make_result):
        """Results arriving in order are emitted immediately."""
        collector = ResultCollector(total_chunks=3)
        emitted = collector.add(make_result(0))
        assert len(emitted) == 1
        assert emitted[0].sequence == 0
        emitted = collector.add(make_result(1))
        assert len(emitted) == 1
        emitted = collector.add(make_result(2))
        assert len(emitted) == 1
        assert collector.is_complete
    def test_out_of_order_buffering(self, make_result):
        """Out-of-order results are buffered until gaps fill."""
        collector = ResultCollector(total_chunks=3)
        # Arrive: 2, 0, 1
        emitted = collector.add(make_result(2))
        assert len(emitted) == 0
        assert collector.buffered_count == 1
        emitted = collector.add(make_result(0))
        assert len(emitted) == 1  # Only 0 emitted, 1 still missing
        emitted = collector.add(make_result(1))
        assert len(emitted) == 2  # 1 and 2 now emittable
        assert collector.is_complete
    def test_reverse_order(self, make_result):
        """All results arrive in reverse — only last add emits everything."""
        collector = ResultCollector(total_chunks=4)
        for seq in [3, 2, 1]:
            emitted = collector.add(make_result(seq))
            assert len(emitted) == 0
        emitted = collector.add(make_result(0))
        assert len(emitted) == 4
        assert collector.is_complete
    def test_duplicate_raises(self, make_result):
        """Duplicate sequence number raises ReassemblyError."""
        collector = ResultCollector(total_chunks=3)
        collector.add(make_result(0))
        with pytest.raises(ReassemblyError, match="Duplicate"):
            collector.add(make_result(0))
    def test_emitted_count(self, make_result):
        """emitted_count tracks correctly."""
        collector = ResultCollector(total_chunks=3)
        assert collector.emitted_count == 0
        collector.add(make_result(0))
        assert collector.emitted_count == 1
        collector.add(make_result(2))  # buffered
        assert collector.emitted_count == 1
        collector.add(make_result(1))  # releases 1 and 2
        assert collector.emitted_count == 3
    def test_get_ordered_results(self, make_result):
        """get_ordered_results returns all emitted results in order."""
        collector = ResultCollector(total_chunks=3)
        collector.add(make_result(2))
        collector.add(make_result(0))
        collector.add(make_result(1))
        ordered = collector.get_ordered_results()
        assert [r.sequence for r in ordered] == [0, 1, 2]
    def test_avg_processing_time(self, make_result):
        """Average processing time from sliding window."""
        collector = ResultCollector(total_chunks=2)
        collector.add(make_result(0, processing_time=0.1))
        collector.add(make_result(1, processing_time=0.3))
        assert abs(collector.avg_processing_time - 0.2) < 0.001
    def test_not_complete_when_partial(self, make_result):
        """is_complete is False until all chunks emitted."""
        collector = ResultCollector(total_chunks=3)
        collector.add(make_result(0))
        collector.add(make_result(1))
        assert not collector.is_complete
--- a/tests/chunker/test_exceptions.py
+++ b/tests/chunker/test_exceptions.py
@@ -0,0 +1,69 @@
 """
 Tests for exception hierarchy — catch patterns, attributes.
 Demonstrates: TDD (Interview Topic 8) — testing exception design.
 """
 import pytest
 from core.chunker.exceptions import (
    ChunkChecksumError,
    ChunkError,
    ChunkReadError,
    PipelineError,
    ProcessingError,
    ProcessorFailureError,
    ProcessorTimeoutError,
    ReassemblyError,
 )
 class TestExceptionHierarchy:
    """Verify the exception class hierarchy and catch patterns."""
    def test_pipeline_error_is_base(self):
        """All chunker exceptions inherit from PipelineError."""
        assert issubclass(ChunkError, PipelineError)
        assert issubclass(ProcessingError, PipelineError)
        assert issubclass(ReassemblyError, PipelineError)
    def test_chunk_error_subtypes(self):
        """ChunkReadError and ChunkChecksumError are ChunkErrors."""
        assert issubclass(ChunkReadError, ChunkError)
        assert issubclass(ChunkChecksumError, ChunkError)
    def test_processing_error_subtypes(self):
        """ProcessorTimeoutError and ProcessorFailureError are ProcessingErrors."""
        assert issubclass(ProcessorTimeoutError, ProcessingError)
        assert issubclass(ProcessorFailureError, ProcessingError)
    def test_catch_pipeline_error_catches_all(self):
        """Catching PipelineError catches any subtype."""
        with pytest.raises(PipelineError):
            raise ChunkReadError("test")
        with pytest.raises(PipelineError):
            raise ReassemblyError("test")
    def test_checksum_error_attributes(self):
        """ChunkChecksumError carries sequence, expected, actual."""
        err = ChunkChecksumError(sequence=5, expected="aaa", actual="bbb")
        assert err.sequence == 5
        assert err.expected == "aaa"
        assert err.actual == "bbb"
        assert "5" in str(err)
    def test_timeout_error_attributes(self):
        """ProcessorTimeoutError carries sequence and timeout."""
        err = ProcessorTimeoutError(sequence=3, timeout=30.0)
        assert err.sequence == 3
        assert err.timeout == 30.0
    def test_failure_error_attributes(self):
        """ProcessorFailureError carries sequence, retries, original error."""
        original = RuntimeError("boom")
        err = ProcessorFailureError(sequence=1, retries=3, original_error=original)
        assert err.sequence == 1
        assert err.retries == 3
        assert err.original_error is original
        assert "boom" in str(err)
--- a/tests/chunker/test_pipeline.py
+++ b/tests/chunker/test_pipeline.py
@@ -0,0 +1,144 @@
 """
 Tests for Pipeline — end-to-end orchestration, stats, error handling.
 Demonstrates: TDD (Interview Topic 8) — integration testing with mocked FFmpeg probe.
 """
 from unittest.mock import MagicMock, patch
 import pytest
 from core.chunker import Pipeline
 from core.chunker.exceptions import PipelineError
 def mock_probe(duration):
    """Create a mock ProbeResult with the given duration."""
    result = MagicMock()
    result.duration = duration
    return result
 class TestPipeline:
    @patch("core.chunker.chunker.probe_file")
    def test_end_to_end(self, mock_pf, temp_file):
        """Full pipeline processes a file successfully."""
        path = temp_file(b"x" * 4096)
        mock_pf.return_value = mock_probe(40.0)
        result = Pipeline(
            source=path,
            chunk_duration=10.0,
            num_workers=2,
            processor_type="checksum",
        ).run()
        assert result.total_chunks == 4
        assert result.processed == 4
        assert result.failed == 0
        assert result.elapsed_time > 0
        assert result.chunks_in_order is True
    @patch("core.chunker.chunker.probe_file")
    def test_throughput_calculated(self, mock_pf, temp_file):
        """Pipeline calculates throughput."""
        path = temp_file(b"x" * 10000)
        mock_pf.return_value = mock_probe(30.0)
        result = Pipeline(source=path, chunk_duration=10.0, num_workers=2).run()
        assert result.throughput_mbps > 0
    @patch("core.chunker.chunker.probe_file")
    def test_worker_stats(self, mock_pf, temp_file):
        """Pipeline reports per-worker stats."""
        path = temp_file(b"x" * 4000)
        mock_pf.return_value = mock_probe(40.0)
        result = Pipeline(
            source=path, chunk_duration=10.0, num_workers=2
        ).run()
        assert len(result.worker_stats) == 2
        for worker_id, stats in result.worker_stats.items():
            assert "processed" in stats
            assert "errors" in stats
    def test_nonexistent_file(self):
        """Non-existent file raises PipelineError."""
        with pytest.raises(PipelineError):
            Pipeline(source="/nonexistent/file.mp4").run()
    @patch("core.chunker.chunker.probe_file")
    def test_event_callback(self, mock_pf, temp_file):
        """Pipeline emits events through callback."""
        path = temp_file(b"x" * 2048)
        mock_pf.return_value = mock_probe(20.0)
        events = []
        def capture(event_type, data):
            events.append(event_type)
        Pipeline(
            source=path,
            chunk_duration=10.0,
            num_workers=1,
            event_callback=capture,
        ).run()
        assert "pipeline_start" in events
        assert "pipeline_complete" in events
        assert "chunk_queued" in events
    @patch("core.chunker.chunker.probe_file")
    def test_simulated_decode_processor(self, mock_pf, temp_file):
        """Pipeline works with simulated_decode processor."""
        path = temp_file(b"x" * 2048)
        mock_pf.return_value = mock_probe(20.0)
        result = Pipeline(
            source=path,
            chunk_duration=10.0,
            num_workers=2,
            processor_type="simulated_decode",
        ).run()
        assert result.total_chunks == 2
        assert result.failed == 0
    @patch("core.chunker.chunker.probe_file")
    def test_single_chunk_file(self, mock_pf, temp_file):
        """Duration shorter than chunk_duration produces one chunk."""
        path = temp_file(b"x" * 100)
        mock_pf.return_value = mock_probe(5.0)
        result = Pipeline(source=path, chunk_duration=10.0).run()
        assert result.total_chunks == 1
        assert result.processed == 1
    @patch("core.chunker.chunker.probe_file")
    def test_retries_tracked(self, mock_pf, temp_file):
        """Pipeline result tracks total retries."""
        path = temp_file(b"x" * 2048)
        mock_pf.return_value = mock_probe(20.0)
        result = Pipeline(source=path, chunk_duration=10.0).run()
        assert result.retries >= 0  # Might be 0 if no failures
    @patch("core.chunker.chunker.probe_file")
    def test_output_dir_and_chunk_files(self, mock_pf, temp_file):
        """Pipeline tracks output_dir and chunk_files when set."""
        path = temp_file(b"x" * 1024)
        mock_pf.return_value = mock_probe(10.0)
        result = Pipeline(
            source=path,
            chunk_duration=10.0,
            processor_type="checksum",
        ).run()
        # No output_dir set, so chunk_files should be empty
        assert result.output_dir is None
        assert result.chunk_files == []
--- a/tests/chunker/test_processor.py
+++ b/tests/chunker/test_processor.py
@@ -0,0 +1,98 @@
 """
 Tests for Processor implementations — ChecksumProcessor, SimulatedDecodeProcessor, CompositeProcessor.
 Demonstrates: TDD (Interview Topic 8) — ABC contract, parametrized tests.
 """
 import pytest
 from core.chunker.exceptions import ChunkChecksumError
 from core.chunker.models import Chunk
 from core.chunker.processor import (
    ChecksumProcessor,
    CompositeProcessor,
    Processor,
    SimulatedDecodeProcessor,
 )
 class TestChecksumProcessor:
    def test_valid_time_range(self, sample_chunk):
        """Valid time range passes."""
        proc = ChecksumProcessor()
        result = proc.process(sample_chunk)
        assert result.success is True
        assert result.checksum_valid is True
        assert result.processing_time > 0
    def test_invalid_time_range(self):
        """Invalid time range raises ChunkChecksumError."""
        chunk = Chunk(
            sequence=0,
            start_time=10.0,
            end_time=10.0,  # zero duration
            source_path="/fake.mp4",
            duration=0.0,
        )
        proc = ChecksumProcessor()
        with pytest.raises(ChunkChecksumError) as exc_info:
            proc.process(chunk)
        assert exc_info.value.sequence == 0
    def test_sequence_preserved(self, make_chunk):
        """Result carries the chunk's sequence number."""
        chunk = make_chunk(42)
        proc = ChecksumProcessor()
        result = proc.process(chunk)
        assert result.sequence == 42
 class TestSimulatedDecodeProcessor:
    def test_processes_successfully(self, sample_chunk):
        """Simulated decode always succeeds."""
        proc = SimulatedDecodeProcessor(ms_per_second=1.0)
        result = proc.process(sample_chunk)
        assert result.success is True
        assert result.processing_time > 0
    def test_time_proportional_to_duration(self):
        """Longer chunks take longer."""
        short = Chunk(0, 0.0, 1.0, "/fake.mp4", 1.0)
        long = Chunk(1, 0.0, 10.0, "/fake.mp4", 10.0)
        proc = SimulatedDecodeProcessor(ms_per_second=50.0)
        r_short = proc.process(short)
        r_long = proc.process(long)
        assert r_long.processing_time > r_short.processing_time
 class TestCompositeProcessor:
    def test_chains_processors(self, sample_chunk):
        """Composite runs all processors in sequence."""
        proc = CompositeProcessor([
            ChecksumProcessor(),
            SimulatedDecodeProcessor(ms_per_second=1.0),
        ])
        result = proc.process(sample_chunk)
        assert result.success is True
    def test_stops_on_failure(self):
        """If first processor raises, composite propagates the error."""
        bad_chunk = Chunk(0, 10.0, 10.0, "/fake.mp4", 0.0)  # invalid range
        proc = CompositeProcessor([
            ChecksumProcessor(),
            SimulatedDecodeProcessor(ms_per_second=1.0),
        ])
        with pytest.raises(ChunkChecksumError):
            proc.process(bad_chunk)
    def test_requires_at_least_one(self):
        """Empty processor list raises ValueError."""
        with pytest.raises(ValueError, match="at least one"):
            CompositeProcessor([])
    def test_is_processor(self):
        """CompositeProcessor is a Processor."""
        proc = CompositeProcessor([ChecksumProcessor()])
        assert isinstance(proc, Processor)
--- a/tests/chunker/test_queue.py
+++ b/tests/chunker/test_queue.py
@@ -0,0 +1,115 @@
 """
 Tests for ChunkQueue — backpressure, sentinel shutdown, timeout behavior.
 Demonstrates: TDD (Interview Topic 8) — concurrency testing.
 """
 import queue
 import threading
 import pytest
 from core.chunker.queue import ChunkQueue
 class TestChunkQueue:
    def test_put_and_get(self, make_chunk):
        """Basic put/get cycle."""
        q = ChunkQueue(maxsize=5)
        chunk = make_chunk(0)
        q.put(chunk)
        result = q.get(timeout=1.0)
        assert result.sequence == 0
    def test_fifo_order(self, make_chunk):
        """Items come out in FIFO order."""
        q = ChunkQueue(maxsize=5)
        for i in range(3):
            q.put(make_chunk(i))
        for i in range(3):
            assert q.get(timeout=1.0).sequence == i
    def test_close_returns_none(self, make_chunk):
        """After close(), get() returns None (sentinel)."""
        q = ChunkQueue(maxsize=5)
        q.put(make_chunk(0))
        q.close()
        result = q.get(timeout=1.0)
        assert result.sequence == 0
        # Next get should hit sentinel
        result = q.get(timeout=1.0)
        assert result is None
    def test_close_propagates_to_multiple_consumers(self, make_chunk):
        """Sentinel propagates: multiple consumers all get None."""
        q = ChunkQueue(maxsize=5)
        q.close()
        # Multiple consumers should all see None
        assert q.get(timeout=1.0) is None
        assert q.get(timeout=1.0) is None
    def test_is_closed(self):
        """is_closed reflects state."""
        q = ChunkQueue()
        assert not q.is_closed
        q.close()
        assert q.is_closed
    def test_qsize(self, make_chunk):
        """qsize tracks approximate queue depth."""
        q = ChunkQueue(maxsize=10)
        assert q.qsize() == 0
        q.put(make_chunk(0))
        q.put(make_chunk(1))
        assert q.qsize() == 2
        q.get(timeout=1.0)
        assert q.qsize() == 1
    def test_backpressure_blocks(self, make_chunk):
        """Put blocks when queue is full (backpressure)."""
        q = ChunkQueue(maxsize=2)
        q.put(make_chunk(0))
        q.put(make_chunk(1))
        # Queue is full — put with short timeout should raise
        with pytest.raises(queue.Full):
            q.put(make_chunk(2), timeout=0.05)
    def test_get_timeout(self):
        """Get on empty queue with timeout raises Empty."""
        q = ChunkQueue(maxsize=5)
        with pytest.raises(queue.Empty):
            q.get(timeout=0.05)
    def test_concurrent_put_get(self, make_chunk):
        """Producer/consumer threads work correctly."""
        q = ChunkQueue(maxsize=3)
        results = []
        def producer():
            for i in range(10):
                q.put(make_chunk(i))
            q.close()
        def consumer():
            while True:
                item = q.get(timeout=2.0)
                if item is None:
                    break
                results.append(item.sequence)
        t1 = threading.Thread(target=producer)
        t2 = threading.Thread(target=consumer)
        t1.start()
        t2.start()
        t1.join(timeout=5.0)
        t2.join(timeout=5.0)
        assert sorted(results) == list(range(10))
--- a/tests/chunker/test_worker.py
+++ b/tests/chunker/test_worker.py
@@ -0,0 +1,127 @@
 """
 Tests for Worker — processing, retry with backoff, error handling.
 Demonstrates: TDD (Interview Topic 8) — mocking processors, testing retry logic.
 """
 from unittest.mock import MagicMock
 import pytest
 from core.chunker.models import Chunk, ChunkResult
 from core.chunker.processor import Processor
 from core.chunker.queue import ChunkQueue
 from core.chunker.worker import Worker
 class FailNTimesProcessor(Processor):
    """Test processor that fails N times then succeeds."""
    def __init__(self, fail_count: int):
        self.fail_count = fail_count
        self.call_count = 0
    def process(self, chunk: Chunk) -> ChunkResult:
        self.call_count += 1
        if self.call_count <= self.fail_count:
            raise RuntimeError(f"Simulated failure #{self.call_count}")
        return ChunkResult(
            sequence=chunk.sequence,
            success=True,
            processing_time=0.001,
        )
 class AlwaysFailProcessor(Processor):
    """Test processor that always fails."""
    def process(self, chunk: Chunk) -> ChunkResult:
        raise RuntimeError("Always fails")
 class TestWorker:
    def test_processes_chunks(self, make_chunk):
        """Worker processes all chunks from queue."""
        q = ChunkQueue(maxsize=5)
        for i in range(3):
            q.put(make_chunk(i))
        q.close()
        from core.chunker.processor import ChecksumProcessor
        worker = Worker("w-0", q, ChecksumProcessor(), max_retries=0)
        results = worker.run()
        assert len(results) == 3
        assert all(r.success for r in results)
    def test_retry_on_failure(self, make_chunk):
        """Worker retries on processor failure."""
        q = ChunkQueue(maxsize=5)
        q.put(make_chunk(0))
        q.close()
        proc = FailNTimesProcessor(fail_count=2)
        worker = Worker("w-0", q, proc, max_retries=3)
        results = worker.run()
        assert len(results) == 1
        assert results[0].success is True
        assert results[0].retries == 2
        assert proc.call_count == 3  # 2 failures + 1 success
    def test_max_retries_exceeded(self, make_chunk):
        """Worker gives up after max retries."""
        q = ChunkQueue(maxsize=5)
        q.put(make_chunk(0))
        q.close()
        worker = Worker("w-0", q, AlwaysFailProcessor(), max_retries=2)
        results = worker.run()
        assert len(results) == 1
        assert results[0].success is False
        assert results[0].error is not None
        assert worker.error_count == 1
    def test_worker_id_on_results(self, make_chunk):
        """Worker stamps its ID on results."""
        q = ChunkQueue(maxsize=5)
        q.put(make_chunk(0))
        q.close()
        from core.chunker.processor import ChecksumProcessor
        worker = Worker("worker-7", q, ChecksumProcessor())
        results = worker.run()
        assert results[0].worker_id == "worker-7"
    def test_event_callback(self, make_chunk):
        """Worker emits events via callback."""
        q = ChunkQueue(maxsize=5)
        q.put(make_chunk(0))
        q.close()
        events = []
        callback = MagicMock(side_effect=lambda t, d: events.append((t, d)))
        from core.chunker.processor import ChecksumProcessor
        worker = Worker("w-0", q, ChecksumProcessor(), event_callback=callback)
        worker.run()
        event_types = [e[0] for e in events]
        assert "worker_status" in event_types
        assert "chunk_processing" in event_types
        assert "chunk_done" in event_types
    def test_processed_count(self, make_chunk):
        """Worker tracks processed count."""
        q = ChunkQueue(maxsize=10)
        for i in range(5):
            q.put(make_chunk(i))
        q.close()
        from core.chunker.processor import ChecksumProcessor
        worker = Worker("w-0", q, ChecksumProcessor())
        worker.run()
        assert worker.processed_count == 5
--- a/ui/chunker/index.html
+++ b/ui/chunker/index.html
@@ -0,0 +1,12 @@
 <!doctype html>
 <html lang="en">
    <head>
        <meta charset="UTF-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1.0" />
        <title>MPR Chunker Pipeline</title>
    </head>
    <body>
        <div id="app"></div>
        <script type="module" src="/src/main.tsx"></script>
    </body>
 </html>
--- a/ui/chunker/package-lock.json
+++ b/ui/chunker/package-lock.json
--- a/ui/chunker/package.json
+++ b/ui/chunker/package.json
@@ -0,0 +1,22 @@
 {
  "name": "mpr-chunker",
  "version": "0.1.0",
  "private": true,
  "type": "module",
  "scripts": {
    "dev": "vite",
    "build": "tsc && vite build",
    "preview": "vite preview"
  },
  "dependencies": {
    "react": "^18.2.0",
    "react-dom": "^18.2.0"
  },
  "devDependencies": {
    "@types/react": "^18.2.0",
    "@types/react-dom": "^18.2.0",
    "@vitejs/plugin-react": "^4.2.0",
    "typescript": "^5.3.0",
    "vite": "^5.0.0"
  }
 }
--- a/ui/chunker/src/App.css
+++ b/ui/chunker/src/App.css
@@ -0,0 +1,735 @@
 * {
  box-sizing: border-box;
  margin: 0;
  padding: 0;
 }
 body {
  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
    "Fira Code", monospace, sans-serif;
  background: #0f0f0f;
  color: #e0e0e0;
  font-size: 14px;
 }
 /* ---- Layout ---- */
 .app {
  display: flex;
  flex-direction: column;
  height: 100vh;
 }
 .header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  padding: 0.75rem 1.25rem;
  background: #1a1a1a;
  border-bottom: 1px solid #2a2a2a;
 }
 .header h1 {
  font-size: 1.1rem;
  font-weight: 600;
  letter-spacing: -0.01em;
 }
 .connection-status {
  display: flex;
  align-items: center;
  gap: 0.5rem;
  font-size: 0.8rem;
  color: #666;
 }
 .dot {
  width: 8px;
  height: 8px;
  border-radius: 50%;
  background: #555;
 }
 .dot.connected {
  background: #10b981;
  box-shadow: 0 0 6px #10b981;
 }
 .error-banner {
  padding: 0.5rem 1.25rem;
  background: #7f1d1d;
  color: #fca5a5;
  font-size: 0.85rem;
 }
 .layout {
  display: flex;
  flex: 1;
  overflow: hidden;
 }
 .sidebar {
  width: 300px;
  background: #141414;
  border-right: 1px solid #2a2a2a;
  overflow-y: auto;
 }
 .main {
  flex: 1;
  overflow-y: auto;
  padding: 1rem;
  display: flex;
  flex-direction: column;
  gap: 1rem;
 }
 .main-grid {
  display: grid;
  grid-template-columns: 1fr 1fr;
  gap: 1rem;
 }
 .main-left,
 .main-right {
  display: flex;
  flex-direction: column;
  gap: 1rem;
 }
 /* ---- Panel shared ---- */
 .panel-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  margin-bottom: 0.75rem;
 }
 .panel-header h2 {
  font-size: 0.85rem;
  text-transform: uppercase;
  letter-spacing: 0.05em;
  color: #888;
 }
 .badge-row {
  display: flex;
  gap: 0.25rem;
 }
 /* ---- Topic Badge ---- */
 .topic-badge {
  display: inline-flex;
  align-items: center;
  gap: 0.25rem;
  padding: 0.15rem 0.5rem;
  font-size: 0.65rem;
  background: #1e293b;
  border: 1px solid #334155;
  border-radius: 12px;
  cursor: pointer;
  transition: all 0.2s;
  flex-shrink: 0;
 }
 .topic-badge:hover {
  border-color: #3b82f6;
 }
 .topic-badge.expanded {
  flex-direction: column;
  align-items: flex-start;
  border-radius: 8px;
  padding: 0.5rem;
  position: relative;
  z-index: 10;
  background: #1e293b;
 }
 .topic-number {
  color: #3b82f6;
  font-weight: 700;
 }
 .topic-title {
  color: #94a3b8;
 }
 .topic-detail {
  margin-top: 0.25rem;
  font-size: 0.7rem;
  line-height: 1.4;
 }
 .topic-detail p {
  color: #cbd5e1;
  margin-bottom: 0.25rem;
 }
 .topic-detail code {
  color: #10b981;
  font-size: 0.65rem;
 }
 /* ---- Asset List ---- */
 .scan-button {
  padding: 0.25rem 0.5rem;
  font-size: 0.7rem;
  background: #1e293b;
  color: #94a3b8;
  border: 1px solid #334155;
  border-radius: 4px;
  cursor: pointer;
  transition: all 0.2s;
 }
 .scan-button:hover:not(:disabled) {
  background: #334155;
  color: #e0e0e0;
 }
 .scan-button:disabled {
  opacity: 0.5;
  cursor: not-allowed;
 }
 .asset-list {
  list-style: none;
  max-height: 200px;
  overflow-y: auto;
  margin-bottom: 0.75rem;
 }
 .asset-item {
  padding: 0.4rem 0.5rem;
  cursor: pointer;
  border-left: 2px solid transparent;
  transition: all 0.15s;
  display: flex;
  flex-direction: column;
  gap: 0.1rem;
 }
 .asset-item:hover {
  background: #1a1a1a;
 }
 .asset-item.selected {
  background: #1e293b;
  border-left-color: #3b82f6;
 }
 .asset-filename {
  font-size: 0.8rem;
  color: #e0e0e0;
  white-space: nowrap;
  overflow: hidden;
  text-overflow: ellipsis;
 }
 .asset-meta {
  font-size: 0.65rem;
  color: #555;
 }
 .asset-empty {
  font-size: 0.8rem;
  color: #444;
  padding: 0.75rem 0.5rem;
  text-align: center;
 }
 .selected-asset-info {
  padding: 0.5rem;
  background: #1e293b;
  border: 1px solid #334155;
  border-radius: 4px;
  margin-bottom: 0.75rem;
 }
 .asset-detail {
  display: block;
  font-size: 0.8rem;
  color: #e0e0e0;
  font-weight: 500;
 }
 .asset-detail-meta {
  display: block;
  font-size: 0.65rem;
  color: #64748b;
  margin-top: 0.15rem;
 }
 /* ---- Config Panel ---- */
 .config-panel {
  padding: 1rem;
 }
 .config-field {
  margin-bottom: 0.75rem;
 }
 .config-field label {
  display: block;
  font-size: 0.75rem;
  color: #888;
  margin-bottom: 0.25rem;
 }
 .config-field .default {
  color: #555;
  font-style: italic;
 }
 .config-field input,
 .config-field select {
  width: 100%;
  padding: 0.4rem 0.5rem;
  font-size: 0.8rem;
  background: #222;
  color: #e0e0e0;
  border: 1px solid #333;
  border-radius: 4px;
 }
 .config-field input:focus,
 .config-field select:focus {
  outline: none;
  border-color: #3b82f6;
 }
 .start-button {
  width: 100%;
  padding: 0.5rem;
  font-size: 0.85rem;
  background: #10b981;
  color: #000;
  border: none;
  border-radius: 4px;
  cursor: pointer;
  font-weight: 600;
  margin-top: 0.5rem;
  transition: background 0.2s;
 }
 .start-button:hover:not(:disabled) {
  background: #059669;
 }
 .start-button:disabled {
  background: #333;
  color: #666;
  cursor: not-allowed;
 }
 /* ---- Pipeline Diagram ---- */
 .pipeline-diagram {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .stage-flow {
  display: flex;
  align-items: center;
  gap: 0;
  overflow-x: auto;
 }
 .stage-wrapper {
  display: flex;
  align-items: center;
 }
 .stage {
  padding: 0.5rem 0.75rem;
  background: #1a1a1a;
  border: 1px solid #333;
  border-radius: 6px;
  text-align: center;
  min-width: 120px;
  transition: all 0.3s;
 }
 .stage.active {
  border-color: #3b82f6;
  background: #1e293b;
  box-shadow: 0 0 12px rgba(59, 130, 246, 0.2);
 }
 .stage-label {
  font-size: 0.8rem;
  font-weight: 600;
  color: #e0e0e0;
 }
 .stage-sub {
  font-size: 0.65rem;
  color: #666;
  margin-top: 0.15rem;
 }
 .stage-arrow {
  width: 24px;
  height: 2px;
  background: #444;
  position: relative;
 }
 .stage-arrow::after {
  content: "";
  position: absolute;
  right: 0;
  top: -3px;
  border: 4px solid transparent;
  border-left: 6px solid #444;
 }
 .processor-hierarchy {
  margin-top: 0.75rem;
  padding-top: 0.75rem;
  border-top: 1px solid #222;
 }
 .hierarchy-title {
  font-size: 0.7rem;
  color: #666;
  margin-bottom: 0.35rem;
  font-style: italic;
 }
 .hierarchy-children {
  display: flex;
  gap: 0.5rem;
  flex-wrap: wrap;
 }
 .hierarchy-node {
  font-size: 0.7rem;
  padding: 0.15rem 0.5rem;
  background: #1a1a1a;
  border: 1px solid #333;
  border-radius: 4px;
  color: #94a3b8;
 }
 /* ---- Chunk Grid ---- */
 .chunk-grid-panel {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .chunk-count {
  font-size: 0.7rem;
  color: #555;
  font-weight: 400;
 }
 .chunk-grid {
  display: grid;
  grid-template-columns: repeat(auto-fill, minmax(32px, 1fr));
  gap: 3px;
  max-height: 200px;
  overflow-y: auto;
 }
 .chunk-cell {
  aspect-ratio: 1;
  display: flex;
  align-items: center;
  justify-content: center;
  font-size: 0.55rem;
  color: rgba(255, 255, 255, 0.6);
  border-radius: 3px;
  transition: background 0.3s;
 }
 .chunk-legend {
  display: flex;
  gap: 0.75rem;
  margin-top: 0.5rem;
  flex-wrap: wrap;
 }
 .legend-item {
  display: flex;
  align-items: center;
  gap: 0.25rem;
  font-size: 0.65rem;
  color: #888;
 }
 .legend-dot {
  width: 8px;
  height: 8px;
  border-radius: 2px;
 }
 /* ---- Worker Panel ---- */
 .worker-panel {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .worker-cards {
  display: flex;
  flex-direction: column;
  gap: 0.5rem;
 }
 .worker-card {
  padding: 0.5rem 0.75rem;
  background: #1a1a1a;
  border: 1px solid #2a2a2a;
  border-radius: 6px;
 }
 .worker-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
 }
 .worker-name {
  font-size: 0.8rem;
  font-weight: 500;
 }
 .worker-state {
  font-size: 0.7rem;
  text-transform: uppercase;
  font-weight: 600;
 }
 .worker-chunk {
  font-size: 0.7rem;
  color: #555;
  margin-top: 0.15rem;
 }
 .worker-stats {
  display: flex;
  gap: 0.75rem;
  font-size: 0.65rem;
  color: #555;
  margin-top: 0.25rem;
 }
 .worker-empty {
  font-size: 0.8rem;
  color: #444;
  text-align: center;
  padding: 1rem;
 }
 /* ---- Queue Gauge ---- */
 .queue-gauge {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .gauge-row {
  margin-bottom: 0.5rem;
 }
 .gauge-label {
  font-size: 0.75rem;
  color: #888;
  margin-bottom: 0.25rem;
 }
 .gauge-value {
  color: #e0e0e0;
  font-weight: 600;
 }
 .gauge-bar {
  height: 8px;
  background: #222;
  border-radius: 4px;
  overflow: hidden;
 }
 .gauge-fill {
  height: 100%;
  border-radius: 4px;
  transition: width 0.3s, background 0.3s;
 }
 .gauge-note {
  font-size: 0.65rem;
  color: #555;
 }
 /* ---- Stats Panel ---- */
 .stats-panel {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .stats-grid {
  display: grid;
  grid-template-columns: repeat(3, 1fr);
  gap: 0.5rem;
 }
 .stat {
  text-align: center;
  padding: 0.5rem;
  background: #1a1a1a;
  border-radius: 6px;
 }
 .stat-value {
  font-size: 1.1rem;
  font-weight: 700;
  color: #e0e0e0;
 }
 .stat-label {
  font-size: 0.6rem;
  color: #666;
  text-transform: uppercase;
  letter-spacing: 0.05em;
  margin-top: 0.15rem;
 }
 .test-info {
  margin-top: 0.75rem;
  padding-top: 0.5rem;
  border-top: 1px solid #222;
  display: flex;
  align-items: center;
  gap: 0.5rem;
 }
 .test-badge {
  font-size: 0.65rem;
  padding: 0.15rem 0.4rem;
  background: #10b981;
  color: #000;
  border-radius: 3px;
  font-weight: 600;
 }
 .test-note {
  font-size: 0.65rem;
  color: #555;
 }
 /* ---- Error Log ---- */
 .error-log {
  background: #141414;
  border: 1px solid #2a2a2a;
  border-radius: 8px;
  padding: 1rem;
 }
 .error-count {
  font-size: 0.7rem;
  background: #7f1d1d;
  color: #fca5a5;
  padding: 0.1rem 0.4rem;
  border-radius: 8px;
  font-weight: 400;
 }
 .exception-tree {
  margin-bottom: 0.75rem;
  padding: 0.5rem;
  background: #1a1a1a;
  border-radius: 6px;
  font-size: 0.7rem;
  font-family: "Fira Code", monospace;
 }
 .tree-node {
  color: #94a3b8;
  padding: 0.1rem 0;
 }
 .tree-node.root {
  color: #f59e0b;
  font-weight: 600;
 }
 .tree-node.leaf {
  color: #64748b;
 }
 .tree-children {
  padding-left: 1rem;
  border-left: 1px solid #333;
  margin-left: 0.5rem;
 }
 .tree-grandchildren {
  padding-left: 1rem;
  border-left: 1px solid #333;
  margin-left: 0.5rem;
 }
 .error-entries {
  max-height: 150px;
  overflow-y: auto;
 }
 .error-empty {
  font-size: 0.8rem;
  color: #444;
  text-align: center;
  padding: 0.5rem;
 }
 .error-entry {
  display: flex;
  gap: 0.5rem;
  align-items: center;
  padding: 0.35rem 0;
  border-bottom: 1px solid #1a1a1a;
  font-size: 0.7rem;
  flex-wrap: wrap;
 }
 .error-type {
  color: #ef4444;
  font-weight: 500;
 }
 .error-seq {
  color: #f59e0b;
 }
 .error-worker {
  color: #3b82f6;
 }
 .error-msg {
  color: #888;
  flex: 1;
 }
 .error-retries {
  color: #f97316;
  font-size: 0.65rem;
 }
--- a/ui/chunker/src/App.tsx
+++ b/ui/chunker/src/App.tsx
@@ -0,0 +1,245 @@
 import { useCallback, useEffect, useMemo, useState } from "react";
 import "./App.css";
 import { createChunkJob, getAssets, scanMediaFolder } from "./api";
 import { ChunkGrid } from "./components/ChunkGrid";
 import { ConfigPanel } from "./components/ConfigPanel";
 import { ErrorLog } from "./components/ErrorLog";
 import { PipelineDiagram } from "./components/PipelineDiagram";
 import { QueueGauge } from "./components/QueueGauge";
 import { StatsPanel } from "./components/StatsPanel";
 import { WorkerPanel } from "./components/WorkerPanel";
 import { useEventStream } from "./hooks/useEventStream";
 import type {
  ChunkInfo,
  ErrorEntry,
  MediaAsset,
  PipelineConfig,
  PipelineStats,
  WorkerInfo,
 } from "./types";
 export default function App() {
  const [jobId, setJobId] = useState<string | null>(null);
  const [running, setRunning] = useState(false);
  const [error, setError] = useState<string | null>(null);
  // Asset state
  const [assets, setAssets] = useState<MediaAsset[]>([]);
  const [selectedAsset, setSelectedAsset] = useState<MediaAsset | null>(null);
  const [scanning, setScanning] = useState(false);
  const { events, connected, done } = useEventStream(jobId);
  // Load assets on mount
  useEffect(() => {
    getAssets()
      .then((data) => setAssets(data.sort((a, b) => a.filename.localeCompare(b.filename))))
      .catch((e) => setError(e instanceof Error ? e.message : "Failed to load assets"));
  }, []);
  const handleScan = useCallback(async () => {
    setScanning(true);
    setError(null);
    try {
      await scanMediaFolder();
      const data = await getAssets();
      setAssets(data.sort((a, b) => a.filename.localeCompare(b.filename)));
    } catch (e) {
      setError(e instanceof Error ? e.message : "Scan failed");
    } finally {
      setScanning(false);
    }
  }, []);
  // Derive state from events
  const { chunks, workers, stats, errors, activeStage, queueSize } =
    useMemo(() => {
      const chunkMap = new Map<number, ChunkInfo>();
      const workerMap = new Map<string, WorkerInfo>();
      const errorList: ErrorEntry[] = [];
      let totalChunks = 0;
      let processed = 0;
      let failed = 0;
      let retries = 0;
      let elapsed = 0;
      let throughput = 0;
      let queueSize = 0;
      let stage = "pending";
      for (const evt of events) {
        if (evt.total_chunks) totalChunks = evt.total_chunks;
        if (evt.processed_chunks) processed = evt.processed_chunks;
        if (evt.failed_chunks) failed = evt.failed_chunks;
        if (evt.elapsed) elapsed = evt.elapsed;
        if (evt.throughput_mbps) throughput = evt.throughput_mbps;
        if (evt.queue_size !== undefined) queueSize = evt.queue_size;
        if (evt.status && evt.status !== "waiting") stage = evt.status;
        // Track chunks
        if (evt.sequence !== undefined) {
          const existing = chunkMap.get(evt.sequence) || {
            sequence: evt.sequence,
            state: "pending" as const,
          };
          if (evt.status === "chunking" || evt.status === "pending") {
            existing.state = "queued";
          } else if (evt.status === "processing") {
            existing.state = "processing";
            if (evt.worker_id) existing.worker_id = evt.worker_id;
          } else if (evt.status === "completed") {
            existing.state = "done";
            if (evt.processing_time)
              existing.processing_time = evt.processing_time;
            if (evt.retries) existing.retries = evt.retries;
          } else if (evt.status === "failed") {
            existing.state = "error";
            if (evt.error) existing.error = evt.error;
          }
          if (evt.size) existing.size = evt.size;
          chunkMap.set(evt.sequence, existing);
        }
        // Track workers
        if (evt.worker_id) {
          const w = workerMap.get(evt.worker_id) || {
            worker_id: evt.worker_id,
            state: "idle" as const,
            processed: 0,
            errors: 0,
            retries: 0,
          };
          if (evt.state === "processing") {
            w.state = "processing";
            w.current_chunk = evt.sequence;
          } else if (evt.state === "idle") {
            w.state = "idle";
            w.current_chunk = undefined;
          } else if (evt.state === "stopped") {
            w.state = "stopped";
          }
          if (evt.success !== undefined) {
            if (evt.success) w.processed++;
            else w.errors++;
          }
          if (evt.retries) {
            retries += evt.retries;
            w.retries += evt.retries;
          }
          workerMap.set(evt.worker_id, w);
        }
        // Track errors
        if (evt.error) {
          errorList.push({
            timestamp: Date.now(),
            sequence: evt.sequence,
            worker_id: evt.worker_id,
            error: evt.error,
            retries: evt.retries,
            event_type: evt.status || "error",
          });
        }
      }
      const statsObj: PipelineStats = {
        total_chunks: totalChunks,
        processed,
        failed,
        retries,
        elapsed,
        throughput_mbps: throughput,
        queue_size: queueSize,
      };
      return {
        chunks: Array.from(chunkMap.values()).sort(
          (a, b) => a.sequence - b.sequence
        ),
        workers: Array.from(workerMap.values()),
        stats: statsObj,
        errors: errorList,
        activeStage: stage,
        queueSize,
      };
    }, [events]);
  const handleStart = useCallback(async (config: PipelineConfig) => {
    setError(null);
    setRunning(true);
    try {
      const result = await createChunkJob(config);
      setJobId(result.id);
    } catch (e) {
      setError(e instanceof Error ? e.message : "Failed to start");
      setRunning(false);
    }
  }, []);
  // Reset running state when done
  if (done && running) {
    setRunning(false);
  }
  return (
    <div className="app">
      <header className="header">
        <h1>MPR Chunker Pipeline</h1>
        <div className="connection-status">
          {jobId && (
            <span className={`dot ${connected ? "connected" : ""}`} />
          )}
          <span className="status-text">
            {!jobId
              ? "Configure and launch"
              : connected
              ? "Streaming"
              : done
              ? "Complete"
              : "Connecting..."}
          </span>
        </div>
      </header>
      {error && <div className="error-banner">{error}</div>}
      <div className="layout">
        <aside className="sidebar">
          <ConfigPanel
            onStart={handleStart}
            running={running}
            assets={assets}
            selectedAsset={selectedAsset}
            onSelectAsset={setSelectedAsset}
            onScan={handleScan}
            scanning={scanning}
          />
        </aside>
        <main className="main">
          <PipelineDiagram activeStage={activeStage} />
          <div className="main-grid">
            <div className="main-left">
              <ChunkGrid chunks={chunks} totalChunks={stats.total_chunks} />
              <QueueGauge
                current={queueSize}
                max={10}
                buffered={0}
              />
            </div>
            <div className="main-right">
              <WorkerPanel workers={workers} />
              <StatsPanel stats={stats} />
              <ErrorLog errors={errors} />
            </div>
          </div>
        </main>
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/api.ts
+++ b/ui/chunker/src/api.ts
@@ -0,0 +1,72 @@
 /**
 * GraphQL API client for the chunker UI.
 */
 import type { MediaAsset } from "./types";
 const GRAPHQL_URL = "/api/graphql";
 async function gql<T>(query: string, variables?: Record<string, unknown>): Promise<T> {
  const response = await fetch(GRAPHQL_URL, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ query, variables }),
  });
  const json = await response.json();
  if (json.errors?.length) {
    throw new Error(json.errors[0].message);
  }
  return json.data as T;
 }
 /** Fetch all media assets. */
 export async function getAssets(): Promise<MediaAsset[]> {
  const data = await gql<{ assets: MediaAsset[] }>(`
    query {
      assets {
        id filename file_path status error_message file_size duration
        video_codec audio_codec width height framerate bitrate
        properties comments tags created_at updated_at
      }
    }
  `);
  return data.assets;
 }
 /** Scan media/in/ folder for new files. */
 export async function scanMediaFolder(): Promise<{
  found: number;
  registered: number;
  skipped: number;
  files: string[];
 }> {
  const data = await gql<{ scan_media_folder: { found: number; registered: number; skipped: number; files: string[] } }>(`
    mutation {
      scan_media_folder { found registered skipped files }
    }
  `);
  return data.scan_media_folder;
 }
 /** Create a chunk job via GraphQL mutation. */
 export async function createChunkJob(config: {
  source_asset_id: string;
  chunk_duration: number;
  num_workers: number;
  max_retries: number;
  processor_type: string;
 }): Promise<{ id: string }> {
  const data = await gql<{ create_chunk_job: { id: string; status: string } }>(`
    mutation CreateChunkJob($input: CreateChunkJobInput!) {
      create_chunk_job(input: $input) {
        id
        status
      }
    }
  `, { input: config });
  return data.create_chunk_job;
 }
--- a/ui/chunker/src/components/ChunkGrid.tsx
+++ b/ui/chunker/src/components/ChunkGrid.tsx
@@ -0,0 +1,59 @@
 import type { ChunkInfo } from "../types";
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  chunks: ChunkInfo[];
  totalChunks: number;
 }
 const STATE_COLORS: Record<string, string> = {
  pending: "#333",
  queued: "#f59e0b",
  processing: "#3b82f6",
  done: "#10b981",
  error: "#ef4444",
  retry: "#f97316",
 };
 /**
 * Grid of chunks colored by processing state.
 * Chunks appear incrementally as the generator yields them.
 * Interview Topic 3: Generators & iteration.
 */
 export function ChunkGrid({ chunks, totalChunks }: Props) {
  return (
    <div className="chunk-grid-panel">
      <div className="panel-header">
        <h2>
          Chunks{" "}
          <span className="chunk-count">
            {chunks.length} / {totalChunks || "?"}
          </span>
        </h2>
        <TopicBadge topic={TOPICS.iteration} />
      </div>
      <div className="chunk-grid">
        {chunks.map((chunk) => (
          <div
            key={chunk.sequence}
            className="chunk-cell"
            style={{ background: STATE_COLORS[chunk.state] || "#333" }}
            title={`#${chunk.sequence} — ${chunk.state}${
              chunk.worker_id ? ` (${chunk.worker_id})` : ""
            }${chunk.retries ? ` retries: ${chunk.retries}` : ""}`}
          >
            {chunk.sequence}
          </div>
        ))}
      </div>
      <div className="chunk-legend">
        {Object.entries(STATE_COLORS).map(([state, color]) => (
          <span key={state} className="legend-item">
            <span className="legend-dot" style={{ background: color }} />
            {state}
          </span>
        ))}
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/components/ConfigPanel.tsx
+++ b/ui/chunker/src/components/ConfigPanel.tsx
@@ -0,0 +1,172 @@
 import { useState } from "react";
 import type { MediaAsset, PipelineConfig } from "../types";
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  onStart: (config: PipelineConfig) => void;
  running: boolean;
  assets: MediaAsset[];
  selectedAsset: MediaAsset | null;
  onSelectAsset: (asset: MediaAsset) => void;
  onScan: () => void;
  scanning: boolean;
 }
 /**
 * Pipeline configuration form with file browser.
 * Each parameter shows its default — Interview Topic 1: Function params & defaults.
 */
 export function ConfigPanel({
  onStart,
  running,
  assets,
  selectedAsset,
  onSelectAsset,
  onScan,
  scanning,
 }: Props) {
  const [chunkDuration, setChunkDuration] = useState(10.0);
  const [numWorkers, setNumWorkers] = useState(4);
  const [maxRetries, setMaxRetries] = useState(3);
  const [processorType, setProcessorType] = useState<
    "ffmpeg" | "checksum" | "simulated_decode" | "composite"
  >("ffmpeg");
  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (!selectedAsset) return;
    onStart({
      source_asset_id: selectedAsset.id,
      chunk_duration: chunkDuration,
      num_workers: numWorkers,
      max_retries: maxRetries,
      processor_type: processorType,
    });
  };
  const formatSize = (bytes: number | null) => {
    if (!bytes) return "—";
    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
    return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
  };
  const formatDuration = (seconds: number | null) => {
    if (!seconds) return "—";
    const m = Math.floor(seconds / 60);
    const s = Math.floor(seconds % 60);
    return `${m}:${s.toString().padStart(2, "0")}`;
  };
  return (
    <div className="config-panel">
      {/* Asset Browser */}
      <div className="panel-header">
        <h2>Assets</h2>
        <button
          onClick={onScan}
          disabled={scanning}
          className="scan-button"
        >
          {scanning ? "Scanning..." : "Scan Folder"}
        </button>
      </div>
      <ul className="asset-list">
        {assets.length === 0 ? (
          <li className="asset-empty">No assets — click Scan Folder</li>
        ) : (
          assets.map((asset) => (
            <li
              key={asset.id}
              className={`asset-item ${selectedAsset?.id === asset.id ? "selected" : ""}`}
              onClick={() => onSelectAsset(asset)}
              title={asset.filename}
            >
              <span className="asset-filename">{asset.filename}</span>
              <span className="asset-meta">
                {formatSize(asset.file_size)} · {formatDuration(asset.duration)}
              </span>
            </li>
          ))
        )}
      </ul>
      {selectedAsset && (
        <div className="selected-asset-info">
          <span className="asset-detail">{selectedAsset.filename}</span>
          <span className="asset-detail-meta">
            {selectedAsset.video_codec} · {selectedAsset.width}x{selectedAsset.height} · {formatDuration(selectedAsset.duration)}
          </span>
        </div>
      )}
      {/* Pipeline Config */}
      <div className="panel-header" style={{ marginTop: "1rem" }}>
        <h2>Pipeline Config</h2>
        <TopicBadge topic={TOPICS.params} />
      </div>
      <form onSubmit={handleSubmit}>
        <div className="config-field">
          <label>
            Chunk Duration <span className="default">default: 10s</span>
          </label>
          <select
            value={chunkDuration}
            onChange={(e) => setChunkDuration(Number(e.target.value))}
          >
            <option value={5}>5 seconds</option>
            <option value={10}>10 seconds</option>
            <option value={15}>15 seconds</option>
            <option value={30}>30 seconds</option>
            <option value={60}>60 seconds</option>
          </select>
        </div>
        <div className="config-field">
          <label>
            Workers <span className="default">default: 4</span>
          </label>
          <input
            type="number"
            min={1}
            max={16}
            value={numWorkers}
            onChange={(e) => setNumWorkers(Number(e.target.value))}
          />
        </div>
        <div className="config-field">
          <label>
            Max Retries <span className="default">default: 3</span>
          </label>
          <input
            type="number"
            min={0}
            max={10}
            value={maxRetries}
            onChange={(e) => setMaxRetries(Number(e.target.value))}
          />
        </div>
        <div className="config-field">
          <label>
            Processor <span className="default">default: ffmpeg</span>
          </label>
          <select
            value={processorType}
            onChange={(e) =>
              setProcessorType(
                e.target.value as "ffmpeg" | "checksum" | "simulated_decode" | "composite"
              )
            }
          >
            <option value="ffmpeg">FFmpegExtractProcessor</option>
            <option value="checksum">ChecksumProcessor</option>
            <option value="simulated_decode">SimulatedDecodeProcessor</option>
            <option value="composite">CompositeProcessor</option>
          </select>
        </div>
        <button type="submit" className="start-button" disabled={running || !selectedAsset}>
          {running ? "Running..." : "Launch Pipeline"}
        </button>
      </form>
    </div>
  );
 }
--- a/ui/chunker/src/components/ErrorLog.tsx
+++ b/ui/chunker/src/components/ErrorLog.tsx
@@ -0,0 +1,63 @@
 import type { ErrorEntry } from "../types";
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  errors: ErrorEntry[];
 }
 /**
 * Error and retry event log.
 * Shows exception types, retry counts, backoff delays.
 * Interview Topic 7: Exception handling & resilient code.
 */
 export function ErrorLog({ errors }: Props) {
  return (
    <div className="error-log">
      <div className="panel-header">
        <h2>
          Errors & Retries{" "}
          <span className="error-count">{errors.length}</span>
        </h2>
        <TopicBadge topic={TOPICS.exceptions} />
      </div>
      <div className="exception-tree">
        <div className="tree-node root">PipelineError</div>
        <div className="tree-children">
          <div className="tree-node">ChunkError</div>
          <div className="tree-grandchildren">
            <div className="tree-node leaf">ChunkReadError</div>
            <div className="tree-node leaf">ChunkChecksumError</div>
          </div>
          <div className="tree-node">ProcessingError</div>
          <div className="tree-grandchildren">
            <div className="tree-node leaf">ProcessorTimeoutError</div>
            <div className="tree-node leaf">ProcessorFailureError</div>
          </div>
          <div className="tree-node">ReassemblyError</div>
        </div>
      </div>
      <div className="error-entries">
        {errors.length === 0 && (
          <div className="error-empty">No errors recorded</div>
        )}
        {errors.map((entry, i) => (
          <div key={i} className="error-entry">
            <span className="error-type">{entry.event_type}</span>
            {entry.sequence !== undefined && (
              <span className="error-seq">chunk #{entry.sequence}</span>
            )}
            {entry.worker_id && (
              <span className="error-worker">{entry.worker_id}</span>
            )}
            <span className="error-msg">{entry.error}</span>
            {entry.retries !== undefined && entry.retries > 0 && (
              <span className="error-retries">
                {entry.retries} retries
              </span>
            )}
          </div>
        ))}
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/components/PipelineDiagram.tsx
+++ b/ui/chunker/src/components/PipelineDiagram.tsx
@@ -0,0 +1,50 @@
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  activeStage: string;
 }
 const STAGES = [
  { id: "chunking", label: "Chunker", sub: "File -> Chunks (generator)" },
  { id: "queued", label: "ChunkQueue", sub: "Bounded queue (backpressure)" },
  { id: "processing", label: "WorkerPool", sub: "ThreadPoolExecutor" },
  { id: "collecting", label: "ResultCollector", sub: "heapq reassembly" },
  { id: "completed", label: "PipelineResult", sub: "Aggregate stats" },
 ];
 /**
 * Visual flow diagram of pipeline stages.
 * Highlights the currently active stage.
 * Interview Topic 4: OOP design — shows class hierarchy.
 */
 export function PipelineDiagram({ activeStage }: Props) {
  return (
    <div className="pipeline-diagram">
      <div className="panel-header">
        <h2>Pipeline Flow</h2>
        <TopicBadge topic={TOPICS.oop} />
      </div>
      <div className="stage-flow">
        {STAGES.map((stage, i) => (
          <div key={stage.id} className="stage-wrapper">
            <div
              className={`stage ${activeStage === stage.id ? "active" : ""}`}
            >
              <div className="stage-label">{stage.label}</div>
              <div className="stage-sub">{stage.sub}</div>
            </div>
            {i < STAGES.length - 1 && <div className="stage-arrow" />}
          </div>
        ))}
      </div>
      <div className="processor-hierarchy">
        <div className="hierarchy-title">Processor ABC</div>
        <div className="hierarchy-children">
          <span className="hierarchy-node">ChecksumProcessor</span>
          <span className="hierarchy-node">SimulatedDecodeProcessor</span>
          <span className="hierarchy-node">CompositeProcessor</span>
        </div>
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/components/QueueGauge.tsx
+++ b/ui/chunker/src/components/QueueGauge.tsx
@@ -0,0 +1,46 @@
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  current: number;
  max: number;
  buffered: number;
 }
 /**
 * Queue fill level gauge + collector heap buffer.
 * Interview Topic 5: Data structures — queue.Queue, heapq, deque.
 */
 export function QueueGauge({ current, max, buffered }: Props) {
  const fillPct = max > 0 ? Math.min((current / max) * 100, 100) : 0;
  return (
    <div className="queue-gauge">
      <div className="panel-header">
        <h2>Queue & Buffer</h2>
        <TopicBadge topic={TOPICS.datastructures} />
      </div>
      <div className="gauge-row">
        <div className="gauge-label">
          Queue <span className="gauge-value">{current}/{max}</span>
        </div>
        <div className="gauge-bar">
          <div
            className="gauge-fill"
            style={{
              width: `${fillPct}%`,
              background: fillPct > 80 ? "#ef4444" : "#3b82f6",
            }}
          />
        </div>
      </div>
      <div className="gauge-row">
        <div className="gauge-label">
          Heap Buffer <span className="gauge-value">{buffered}</span>
        </div>
        <div className="gauge-note">
          Out-of-order results waiting for gaps to fill
        </div>
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/components/StatsPanel.tsx
+++ b/ui/chunker/src/components/StatsPanel.tsx
@@ -0,0 +1,59 @@
 import type { PipelineStats } from "../types";
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  stats: PipelineStats;
 }
 /**
 * Throughput, timing, and error stats.
 * Interview Topic 6: Algorithms — throughput calculation over sliding window.
 * Interview Topic 8: TDD — test count and coverage.
 */
 export function StatsPanel({ stats }: Props) {
  return (
    <div className="stats-panel">
      <div className="panel-header">
        <h2>Stats</h2>
        <div className="badge-row">
          <TopicBadge topic={TOPICS.algorithms} />
          <TopicBadge topic={TOPICS.testing} />
        </div>
      </div>
      <div className="stats-grid">
        <div className="stat">
          <div className="stat-value">{stats.total_chunks}</div>
          <div className="stat-label">Total Chunks</div>
        </div>
        <div className="stat">
          <div className="stat-value">{stats.processed}</div>
          <div className="stat-label">Processed</div>
        </div>
        <div className="stat">
          <div className="stat-value">{stats.failed}</div>
          <div className="stat-label">Failed</div>
        </div>
        <div className="stat">
          <div className="stat-value">{stats.retries}</div>
          <div className="stat-label">Retries</div>
        </div>
        <div className="stat">
          <div className="stat-value">
            {stats.throughput_mbps.toFixed(2)}
          </div>
          <div className="stat-label">MB/s</div>
        </div>
        <div className="stat">
          <div className="stat-value">{stats.elapsed.toFixed(2)}s</div>
          <div className="stat-label">Elapsed</div>
        </div>
      </div>
      <div className="test-info">
        <span className="test-badge">64 tests</span>
        <span className="test-note">
          7 test files &middot; pytest &middot; parametrized
        </span>
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/components/TopicBadge.tsx
+++ b/ui/chunker/src/components/TopicBadge.tsx
@@ -0,0 +1,86 @@
 import { useState } from "react";
 import type { InterviewTopic } from "../types";
 /**
 * Expandable pill badge annotating an interview topic.
 * Click to expand and see description + code reference.
 */
 export function TopicBadge({ topic }: { topic: InterviewTopic }) {
  const [expanded, setExpanded] = useState(false);
  return (
    <div
      className={`topic-badge ${expanded ? "expanded" : ""}`}
      onClick={() => setExpanded(!expanded)}
    >
      <span className="topic-number">#{topic.number}</span>
      <span className="topic-title">{topic.title}</span>
      {expanded && (
        <div className="topic-detail">
          <p>{topic.description}</p>
          <code>{topic.code_ref}</code>
        </div>
      )}
    </div>
  );
 }
 /** Pre-defined topics mapped to pipeline components. */
 export const TOPICS: Record<string, InterviewTopic> = {
  params: {
    number: 1,
    title: "Function Params & Defaults",
    description:
      "Each pipeline parameter has a sensible default (chunk_duration=10s, num_workers=4, max_retries=3). Tweaking them changes pipeline behavior.",
    code_ref: "core/chunker/pipeline.py — Pipeline.__init__()",
  },
  concurrency: {
    number: 2,
    title: "Concurrency (Threading)",
    description:
      "Workers run in a ThreadPoolExecutor. The queue coordinates work between producer and consumer threads.",
    code_ref: "core/chunker/pool.py — WorkerPool, ThreadPoolExecutor",
  },
  iteration: {
    number: 3,
    title: "Generators & Iteration",
    description:
      "Chunks are yielded lazily via a generator — the file is never fully loaded into memory.",
    code_ref: "core/chunker/chunker.py — Chunker.chunks() generator",
  },
  oop: {
    number: 4,
    title: "OOP Design (ABC)",
    description:
      "Processor is an abstract base class. ChecksumProcessor, SimulatedDecodeProcessor, and CompositeProcessor inherit from it.",
    code_ref: "core/chunker/processor.py — Processor ABC hierarchy",
  },
  datastructures: {
    number: 5,
    title: "Data Structures",
    description:
      "Bounded queue.Queue for backpressure, heapq min-heap for ordered reassembly, deque for sliding-window throughput.",
    code_ref: "core/chunker/queue.py, collector.py, models.py",
  },
  algorithms: {
    number: 6,
    title: "Algorithms & Sorting",
    description:
      "ResultCollector uses a min-heap to reassemble chunks in sequence order, even when they arrive out of order.",
    code_ref: "core/chunker/collector.py — heapq-based reassembly",
  },
  exceptions: {
    number: 7,
    title: "Exception Handling",
    description:
      "PipelineError hierarchy with typed exceptions. Workers retry with exponential backoff before giving up.",
    code_ref: "core/chunker/exceptions.py, worker.py — retry logic",
  },
  testing: {
    number: 8,
    title: "TDD & Unit Testing",
    description:
      "64 tests covering every module. Parametrized tests, fixtures, edge cases, concurrency tests.",
    code_ref: "tests/chunker/ — 7 test files, pytest",
  },
 };
--- a/ui/chunker/src/components/WorkerPanel.tsx
+++ b/ui/chunker/src/components/WorkerPanel.tsx
@@ -0,0 +1,55 @@
 import type { WorkerInfo } from "../types";
 import { TopicBadge, TOPICS } from "./TopicBadge";
 interface Props {
  workers: WorkerInfo[];
 }
 const STATE_COLORS: Record<string, string> = {
  idle: "#6b7280",
  processing: "#3b82f6",
  retry: "#f97316",
  stopped: "#ef4444",
 };
 /**
 * Worker thread status cards.
 * Shows each worker's real-time state and which chunk it's processing.
 * Interview Topic 2: Concurrency (threading).
 */
 export function WorkerPanel({ workers }: Props) {
  return (
    <div className="worker-panel">
      <div className="panel-header">
        <h2>Workers</h2>
        <TopicBadge topic={TOPICS.concurrency} />
      </div>
      <div className="worker-cards">
        {workers.map((w) => (
          <div key={w.worker_id} className="worker-card">
            <div className="worker-header">
              <span className="worker-name">{w.worker_id}</span>
              <span
                className="worker-state"
                style={{ color: STATE_COLORS[w.state] || "#888" }}
              >
                {w.state}
              </span>
            </div>
            {w.current_chunk !== undefined && (
              <div className="worker-chunk">chunk #{w.current_chunk}</div>
            )}
            <div className="worker-stats">
              <span>done: {w.processed}</span>
              <span>err: {w.errors}</span>
              <span>retry: {w.retries}</span>
            </div>
          </div>
        ))}
        {workers.length === 0 && (
          <div className="worker-empty">No workers started</div>
        )}
      </div>
    </div>
  );
 }
--- a/ui/chunker/src/hooks/useEventStream.ts
+++ b/ui/chunker/src/hooks/useEventStream.ts
@@ -0,0 +1,81 @@
 import { useCallback, useEffect, useRef, useState } from "react";
 import type { PipelineEvent } from "../types";
 /**
 * SSE hook — connects to /api/chunker/stream/{jobId} via native EventSource.
 *
 * Demonstrates: real-time event streaming from backend to UI.
 */
 export function useEventStream(jobId: string | null) {
  const [events, setEvents] = useState<PipelineEvent[]>([]);
  const [connected, setConnected] = useState(false);
  const [done, setDone] = useState(false);
  const esRef = useRef<EventSource | null>(null);
  const close = useCallback(() => {
    if (esRef.current) {
      esRef.current.close();
      esRef.current = null;
      setConnected(false);
    }
  }, []);
  useEffect(() => {
    if (!jobId) return;
    setEvents([]);
    setDone(false);
    const es = new EventSource(`/api/chunker/stream/${jobId}`);
    esRef.current = es;
    es.onopen = () => setConnected(true);
    es.onerror = () => setConnected(false);
    const handleEvent = (eventType: string) => (e: MessageEvent) => {
      try {
        const data = JSON.parse(e.data) as PipelineEvent;
        setEvents((prev) => [...prev, { ...data, status: eventType }]);
      } catch {
        // ignore parse errors
      }
    };
    // Listen to all chunker event types
    const eventTypes = [
      "waiting",
      "pending",
      "chunking",
      "processing",
      "collecting",
      "completed",
      "failed",
      "cancelled",
      "done",
      "timeout",
    ];
    for (const type of eventTypes) {
      es.addEventListener(type, handleEvent(type));
    }
    es.addEventListener("done", () => {
      setDone(true);
      es.close();
      setConnected(false);
    });
    es.addEventListener("timeout", () => {
      setDone(true);
      es.close();
      setConnected(false);
    });
    return () => {
      es.close();
      esRef.current = null;
    };
  }, [jobId]);
  return { events, connected, done, close };
 }
--- a/ui/chunker/src/main.tsx
+++ b/ui/chunker/src/main.tsx
@@ -0,0 +1,9 @@
 import React from "react";
 import ReactDOM from "react-dom/client";
 import App from "./App";
 ReactDOM.createRoot(document.getElementById("app")!).render(
  <React.StrictMode>
    <App />
  </React.StrictMode>
 );
--- a/ui/chunker/src/types.ts
+++ b/ui/chunker/src/types.ts
@@ -0,0 +1,114 @@
 /** Pipeline configuration sent to the backend. */
 export interface PipelineConfig {
  source_asset_id: string;
  chunk_duration: number;
  num_workers: number;
  max_retries: number;
  processor_type: "ffmpeg" | "checksum" | "simulated_decode" | "composite";
 }
 /** Media asset from the backend. */
 export interface MediaAsset {
  id: string;
  filename: string;
  file_path: string;
  status: string;
  error_message: string | null;
  file_size: number | null;
  duration: number | null;
  video_codec: string | null;
  audio_codec: string | null;
  width: number | null;
  height: number | null;
  framerate: number | null;
  bitrate: number | null;
  properties: Record<string, unknown>;
  comments: string;
  tags: string[];
  created_at: string | null;
  updated_at: string | null;
 }
 /** State of an individual chunk. */
 export type ChunkState =
  | "pending"
  | "queued"
  | "processing"
  | "done"
  | "error"
  | "retry";
 /** Tracked chunk in the UI grid. */
 export interface ChunkInfo {
  sequence: number;
  state: ChunkState;
  size?: number;
  worker_id?: string;
  retries?: number;
  processing_time?: number;
  error?: string;
 }
 /** Worker thread status. */
 export interface WorkerInfo {
  worker_id: string;
  state: "idle" | "processing" | "retry" | "stopped";
  current_chunk?: number;
  processed: number;
  errors: number;
  retries: number;
 }
 /** SSE event from the backend. */
 export interface PipelineEvent {
  job_id: string;
  status?: string;
  progress?: number;
  total_chunks?: number;
  processed_chunks?: number;
  failed_chunks?: number;
  throughput_mbps?: number;
  elapsed?: number;
  error?: string;
  // Chunk-level fields
  sequence?: number;
  size?: number;
  worker_id?: string;
  success?: boolean;
  processing_time?: number;
  retries?: number;
  queue_size?: number;
  // Worker-level fields
  state?: string;
  attempt?: number;
  backoff?: number;
 }
 /** Aggregate pipeline stats. */
 export interface PipelineStats {
  total_chunks: number;
  processed: number;
  failed: number;
  retries: number;
  elapsed: number;
  throughput_mbps: number;
  queue_size: number;
 }
 /** Error log entry. */
 export interface ErrorEntry {
  timestamp: number;
  sequence?: number;
  worker_id?: string;
  error: string;
  retries?: number;
  event_type: string;
 }
 /** Interview topic for annotation badges. */
 export interface InterviewTopic {
  number: number;
  title: string;
  description: string;
  code_ref: string;
 }
--- a/ui/chunker/src/vite-env.d.ts
+++ b/ui/chunker/src/vite-env.d.ts
@@ -0,0 +1 @@
 /// <reference types="vite/client" />
--- a/ui/chunker/tsconfig.json
+++ b/ui/chunker/tsconfig.json
@@ -0,0 +1,21 @@
 {
  "compilerOptions": {
    "target": "ES2020",
    "useDefineForClassFields": true,
    "module": "ESNext",
    "lib": ["ES2020", "DOM", "DOM.Iterable"],
    "skipLibCheck": true,
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "resolveJsonModule": true,
    "isolatedModules": true,
    "noEmit": true,
    "jsx": "react-jsx",
    "strict": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noFallthroughCasesInSwitch": true
  },
  "include": ["src/**/*.ts", "src/**/*.tsx"],
  "references": [{ "path": "./tsconfig.node.json" }]
 }
--- a/ui/chunker/tsconfig.node.json
+++ b/ui/chunker/tsconfig.node.json
@@ -0,0 +1,10 @@
 {
  "compilerOptions": {
    "composite": true,
    "skipLibCheck": true,
    "module": "ESNext",
    "moduleResolution": "bundler",
    "allowSyntheticDefaultImports": true
  },
  "include": ["vite.config.ts"]
 }
--- a/ui/chunker/vite.config.ts
+++ b/ui/chunker/vite.config.ts
@@ -0,0 +1,21 @@
 import { defineConfig } from "vite";
 import react from "@vitejs/plugin-react";
 export default defineConfig({
  plugins: [react()],
  server: {
    host: "0.0.0.0",
    port: 5174,
    allowedHosts: process.env.VITE_ALLOWED_HOSTS?.split(",") || [],
    proxy: {
      "/api": {
        target: "http://fastapi:8702",
        changeOrigin: true,
      },
      "/graphql": {
        target: "http://fastapi:8702",
        changeOrigin: true,
      },
    },
  },
 });
--- a/ui/timeline/.dockerignore
+++ b/ui/timeline/.dockerignore
@@ -0,0 +1,2 @@
 node_modules/
 dist/
--- a/ui/timeline/src/types.ts
+++ b/ui/timeline/src/types.ts
@@ -6,6 +6,7 @@
 export type AssetStatus = "pending" | "ready" | "error";
 export type JobStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
 export type ChunkJobStatus = "pending" | "chunking" | "processing" | "collecting" | "completed" | "failed" | "cancelled";
 export interface MediaAsset {
  id: string;
@@ -73,6 +74,29 @@ export interface TranscodeJob {
  completed_at: string | null;
 }
 export interface ChunkJob {
  id: string;
  source_asset_id: string;
  chunk_duration: number;
  num_workers: number;
  max_retries: number;
  processor_type: string;
  status: ChunkJobStatus;
  progress: number;
  total_chunks: number;
  processed_chunks: number;
  failed_chunks: number;
  retry_count: number;
  error_message: string | null;
  throughput_mbps: number | null;
  elapsed_seconds: number | null;
  celery_task_id: string | null;
  priority: number;
  created_at: string | null;
  started_at: string | null;
  completed_at: string | null;
 }
 export interface CreateJobRequest {
  source_asset_id: string;
  preset_id: string | null;