chunker and ui
This commit is contained in:
141
core/chunker/worker.py
Normal file
141
core/chunker/worker.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Worker — pulls chunks from queue, processes with retry logic.
|
||||
|
||||
Demonstrates:
|
||||
- Exception handling and resilient code (Interview Topic 7)
|
||||
- Concurrency (Interview Topic 2) — workers run in thread pool
|
||||
"""
|
||||
|
||||
import logging
|
||||
import queue
|
||||
import time
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
from .exceptions import ProcessorFailureError
|
||||
from .models import Chunk, ChunkResult
|
||||
from .processor import Processor
|
||||
from .queue import ChunkQueue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Worker:
|
||||
"""
|
||||
Processes chunks from a queue with retry and exponential backoff.
|
||||
|
||||
Args:
|
||||
worker_id: Identifier for this worker (e.g. "worker-0")
|
||||
chunk_queue: Source queue to pull chunks from
|
||||
processor: Processor instance to use
|
||||
max_retries: Maximum retry attempts per chunk (default: 3)
|
||||
event_callback: Optional callback for real-time status updates
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
worker_id: str,
|
||||
chunk_queue: ChunkQueue,
|
||||
processor: Processor,
|
||||
max_retries: int = 3,
|
||||
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
||||
):
|
||||
self.worker_id = worker_id
|
||||
self.chunk_queue = chunk_queue
|
||||
self.processor = processor
|
||||
self.max_retries = max_retries
|
||||
self.event_callback = event_callback
|
||||
self.processed_count = 0
|
||||
self.error_count = 0
|
||||
self.retry_count = 0
|
||||
|
||||
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
|
||||
"""Emit an event if callback is registered."""
|
||||
if self.event_callback:
|
||||
self.event_callback(event_type, {"worker_id": self.worker_id, **data})
|
||||
|
||||
def _process_with_retry(self, chunk: Chunk) -> ChunkResult:
|
||||
"""
|
||||
Process a chunk with exponential backoff retry.
|
||||
|
||||
Retry delays: 0.1s, 0.2s, 0.4s, ... (doubles each attempt)
|
||||
"""
|
||||
last_error = None
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
if attempt > 0:
|
||||
backoff = 0.1 * (2 ** (attempt - 1))
|
||||
self._emit("chunk_retry", {
|
||||
"sequence": chunk.sequence,
|
||||
"attempt": attempt,
|
||||
"backoff": backoff,
|
||||
})
|
||||
time.sleep(backoff)
|
||||
self.retry_count += 1
|
||||
|
||||
result = self.processor.process(chunk)
|
||||
result.retries = attempt
|
||||
result.worker_id = self.worker_id
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(
|
||||
f"{self.worker_id}: chunk {chunk.sequence} "
|
||||
f"attempt {attempt + 1}/{self.max_retries + 1} failed: {e}"
|
||||
)
|
||||
|
||||
# All retries exhausted
|
||||
self.error_count += 1
|
||||
self._emit("chunk_error", {
|
||||
"sequence": chunk.sequence,
|
||||
"error": str(last_error),
|
||||
"retries": self.max_retries,
|
||||
})
|
||||
|
||||
return ChunkResult(
|
||||
sequence=chunk.sequence,
|
||||
success=False,
|
||||
processing_time=0.0,
|
||||
error=str(last_error),
|
||||
retries=self.max_retries,
|
||||
worker_id=self.worker_id,
|
||||
)
|
||||
|
||||
def run(self) -> list[ChunkResult]:
|
||||
"""
|
||||
Main worker loop — pull chunks and process until queue is closed.
|
||||
|
||||
Returns:
|
||||
List of ChunkResults processed by this worker
|
||||
"""
|
||||
results = []
|
||||
self._emit("worker_status", {"state": "idle"})
|
||||
|
||||
while True:
|
||||
try:
|
||||
chunk = self.chunk_queue.get(timeout=1.0)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
if chunk is None: # Sentinel received
|
||||
break
|
||||
|
||||
self._emit("chunk_processing", {
|
||||
"sequence": chunk.sequence,
|
||||
"state": "processing",
|
||||
})
|
||||
|
||||
result = self._process_with_retry(chunk)
|
||||
results.append(result)
|
||||
self.processed_count += 1
|
||||
|
||||
self._emit("chunk_done", {
|
||||
"sequence": chunk.sequence,
|
||||
"success": result.success,
|
||||
"processing_time": result.processing_time,
|
||||
"retries": result.retries,
|
||||
})
|
||||
|
||||
self._emit("worker_status", {"state": "stopped"})
|
||||
return results
|
||||
Reference in New Issue
Block a user