142 lines
4.3 KiB
Python
142 lines
4.3 KiB
Python
"""
|
|
Worker — pulls chunks from queue, processes with retry logic.
|
|
|
|
Demonstrates:
|
|
- Exception handling and resilient code (Interview Topic 7)
|
|
- Concurrency (Interview Topic 2) — workers run in thread pool
|
|
"""
|
|
|
|
import logging
|
|
import queue
|
|
import time
|
|
from typing import Any, Callable, Dict, Optional
|
|
|
|
from .exceptions import ProcessorFailureError
|
|
from .models import Chunk, ChunkResult
|
|
from .processor import Processor
|
|
from .queue import ChunkQueue
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Worker:
|
|
"""
|
|
Processes chunks from a queue with retry and exponential backoff.
|
|
|
|
Args:
|
|
worker_id: Identifier for this worker (e.g. "worker-0")
|
|
chunk_queue: Source queue to pull chunks from
|
|
processor: Processor instance to use
|
|
max_retries: Maximum retry attempts per chunk (default: 3)
|
|
event_callback: Optional callback for real-time status updates
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
worker_id: str,
|
|
chunk_queue: ChunkQueue,
|
|
processor: Processor,
|
|
max_retries: int = 3,
|
|
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
):
|
|
self.worker_id = worker_id
|
|
self.chunk_queue = chunk_queue
|
|
self.processor = processor
|
|
self.max_retries = max_retries
|
|
self.event_callback = event_callback
|
|
self.processed_count = 0
|
|
self.error_count = 0
|
|
self.retry_count = 0
|
|
|
|
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
|
|
"""Emit an event if callback is registered."""
|
|
if self.event_callback:
|
|
self.event_callback(event_type, {"worker_id": self.worker_id, **data})
|
|
|
|
def _process_with_retry(self, chunk: Chunk) -> ChunkResult:
|
|
"""
|
|
Process a chunk with exponential backoff retry.
|
|
|
|
Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt)
|
|
"""
|
|
last_error = None
|
|
|
|
for attempt in range(self.max_retries + 1):
|
|
try:
|
|
if attempt > 0:
|
|
backoff = 0.1 * (2 ** (attempt - 1))
|
|
self._emit("chunk_retry", {
|
|
"sequence": chunk.sequence,
|
|
"attempt": attempt,
|
|
"backoff": backoff,
|
|
})
|
|
time.sleep(backoff)
|
|
self.retry_count += 1
|
|
|
|
result = self.processor.process(chunk)
|
|
result.retries = attempt
|
|
result.worker_id = self.worker_id
|
|
return result
|
|
|
|
except Exception as e:
|
|
last_error = e
|
|
logger.warning(
|
|
f"{self.worker_id}: chunk {chunk.sequence} "
|
|
f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}"
|
|
)
|
|
|
|
# All retries exhausted
|
|
self.error_count += 1
|
|
self._emit("chunk_error", {
|
|
"sequence": chunk.sequence,
|
|
"error": str(last_error),
|
|
"retries": self.max_retries,
|
|
})
|
|
|
|
return ChunkResult(
|
|
sequence=chunk.sequence,
|
|
success=False,
|
|
processing_time=0.0,
|
|
error=str(last_error),
|
|
retries=self.max_retries,
|
|
worker_id=self.worker_id,
|
|
)
|
|
|
|
def run(self) -> list[ChunkResult]:
|
|
"""
|
|
Main worker loop — pull chunks and process until queue is closed.
|
|
|
|
Returns:
|
|
List of ChunkResults processed by this worker
|
|
"""
|
|
results = []
|
|
self._emit("worker_status", {"state": "idle"})
|
|
|
|
while True:
|
|
try:
|
|
chunk = self.chunk_queue.get(timeout=1.0)
|
|
except queue.Empty:
|
|
continue
|
|
|
|
if chunk is None: # Sentinel received
|
|
break
|
|
|
|
self._emit("chunk_processing", {
|
|
"sequence": chunk.sequence,
|
|
"state": "processing",
|
|
})
|
|
|
|
result = self._process_with_retry(chunk)
|
|
results.append(result)
|
|
self.processed_count += 1
|
|
|
|
self._emit("chunk_done", {
|
|
"sequence": chunk.sequence,
|
|
"success": result.success,
|
|
"processing_time": result.processing_time,
|
|
"retries": result.retries,
|
|
})
|
|
|
|
self._emit("worker_status", {"state": "stopped"})
|
|
return results
|