""" Worker — pulls chunks from queue, processes with retry logic. Demonstrates: - Exception handling and resilient code (Interview Topic 7) - Concurrency (Interview Topic 2) — workers run in thread pool """ import logging import queue import time from typing import Any, Callable, Dict, Optional from .exceptions import ProcessorFailureError from .models import Chunk, ChunkResult from .processor import Processor from .queue import ChunkQueue logger = logging.getLogger(__name__) class Worker: """ Processes chunks from a queue with retry and exponential backoff. Args: worker_id: Identifier for this worker (e.g. "worker-0") chunk_queue: Source queue to pull chunks from processor: Processor instance to use max_retries: Maximum retry attempts per chunk (default: 3) event_callback: Optional callback for real-time status updates """ def __init__( self, worker_id: str, chunk_queue: ChunkQueue, processor: Processor, max_retries: int = 3, event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None, ): self.worker_id = worker_id self.chunk_queue = chunk_queue self.processor = processor self.max_retries = max_retries self.event_callback = event_callback self.processed_count = 0 self.error_count = 0 self.retry_count = 0 def _emit(self, event_type: str, data: Dict[str, Any]) -> None: """Emit an event if callback is registered.""" if self.event_callback: self.event_callback(event_type, {"worker_id": self.worker_id, **data}) def _process_with_retry(self, chunk: Chunk) -> ChunkResult: """ Process a chunk with exponential backoff retry. Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt) """ last_error = None for attempt in range(self.max_retries + 1): try: if attempt > 0: backoff = 0.1 * (2 ** (attempt - 1)) self._emit("chunk_retry", { "sequence": chunk.sequence, "attempt": attempt, "backoff": backoff, }) time.sleep(backoff) self.retry_count += 1 result = self.processor.process(chunk) result.retries = attempt result.worker_id = self.worker_id return result except Exception as e: last_error = e logger.warning( f"{self.worker_id}: chunk {chunk.sequence} " f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}" ) # All retries exhausted self.error_count += 1 self._emit("chunk_error", { "sequence": chunk.sequence, "error": str(last_error), "retries": self.max_retries, }) return ChunkResult( sequence=chunk.sequence, success=False, processing_time=0.0, error=str(last_error), retries=self.max_retries, worker_id=self.worker_id, ) def run(self) -> list[ChunkResult]: """ Main worker loop — pull chunks and process until queue is closed. Returns: List of ChunkResults processed by this worker """ results = [] self._emit("worker_status", {"state": "idle"}) while True: try: chunk = self.chunk_queue.get(timeout=1.0) except queue.Empty: continue if chunk is None: # Sentinel received break self._emit("chunk_processing", { "sequence": chunk.sequence, "state": "processing", "queue_size": self.chunk_queue.qsize(), }) result = self._process_with_retry(chunk) results.append(result) self.processed_count += 1 self._emit("chunk_done", { "sequence": chunk.sequence, "success": result.success, "processing_time": result.processing_time, "retries": result.retries, "queue_size": self.chunk_queue.qsize(), }) self._emit("worker_status", {"state": "stopped"}) return results