Files
mediaproc/core/chunker/worker.py
2026-03-13 14:29:38 -03:00

142 lines
4.3 KiB
Python

"""
Worker — pulls chunks from queue, processes with retry logic.
Demonstrates:
- Exception handling and resilient code (Interview Topic 7)
- Concurrency (Interview Topic 2) — workers run in thread pool
"""
import logging
import queue
import time
from typing import Any, Callable, Dict, Optional
from .exceptions import ProcessorFailureError
from .models import Chunk, ChunkResult
from .processor import Processor
from .queue import ChunkQueue
logger = logging.getLogger(__name__)
class Worker:
"""
Processes chunks from a queue with retry and exponential backoff.
Args:
worker_id: Identifier for this worker (e.g. "worker-0")
chunk_queue: Source queue to pull chunks from
processor: Processor instance to use
max_retries: Maximum retry attempts per chunk (default: 3)
event_callback: Optional callback for real-time status updates
"""
def __init__(
self,
worker_id: str,
chunk_queue: ChunkQueue,
processor: Processor,
max_retries: int = 3,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
):
self.worker_id = worker_id
self.chunk_queue = chunk_queue
self.processor = processor
self.max_retries = max_retries
self.event_callback = event_callback
self.processed_count = 0
self.error_count = 0
self.retry_count = 0
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
"""Emit an event if callback is registered."""
if self.event_callback:
self.event_callback(event_type, {"worker_id": self.worker_id, **data})
def _process_with_retry(self, chunk: Chunk) -> ChunkResult:
"""
Process a chunk with exponential backoff retry.
Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt)
"""
last_error = None
for attempt in range(self.max_retries + 1):
try:
if attempt > 0:
backoff = 0.1 * (2 ** (attempt - 1))
self._emit("chunk_retry", {
"sequence": chunk.sequence,
"attempt": attempt,
"backoff": backoff,
})
time.sleep(backoff)
self.retry_count += 1
result = self.processor.process(chunk)
result.retries = attempt
result.worker_id = self.worker_id
return result
except Exception as e:
last_error = e
logger.warning(
f"{self.worker_id}: chunk {chunk.sequence} "
f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}"
)
# All retries exhausted
self.error_count += 1
self._emit("chunk_error", {
"sequence": chunk.sequence,
"error": str(last_error),
"retries": self.max_retries,
})
return ChunkResult(
sequence=chunk.sequence,
success=False,
processing_time=0.0,
error=str(last_error),
retries=self.max_retries,
worker_id=self.worker_id,
)
def run(self) -> list[ChunkResult]:
"""
Main worker loop — pull chunks and process until queue is closed.
Returns:
List of ChunkResults processed by this worker
"""
results = []
self._emit("worker_status", {"state": "idle"})
while True:
try:
chunk = self.chunk_queue.get(timeout=1.0)
except queue.Empty:
continue
if chunk is None: # Sentinel received
break
self._emit("chunk_processing", {
"sequence": chunk.sequence,
"state": "processing",
})
result = self._process_with_retry(chunk)
results.append(result)
self.processed_count += 1
self._emit("chunk_done", {
"sequence": chunk.sequence,
"success": result.success,
"processing_time": result.processing_time,
"retries": result.retries,
})
self._emit("worker_status", {"state": "stopped"})
return results