chunker and ui
This commit is contained in:
98
core/chunker/collector.py
Normal file
98
core/chunker/collector.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
ResultCollector — reassembles chunk results in sequence order using a min-heap.
|
||||
|
||||
Demonstrates:
|
||||
- Algorithms and sorting (Interview Topic 6) — heapq for ordered reassembly
|
||||
- Core data structures (Interview Topic 5) — heap, deque
|
||||
"""
|
||||
|
||||
import heapq
|
||||
from collections import deque
|
||||
from typing import List
|
||||
|
||||
from .exceptions import ReassemblyError
|
||||
from .models import ChunkResult
|
||||
|
||||
|
||||
class ResultCollector:
|
||||
"""
|
||||
Receives ChunkResults out of order, emits them in sequence order.
|
||||
|
||||
Uses a min-heap keyed on sequence number. Only emits a chunk when
|
||||
all prior sequences have been accounted for.
|
||||
|
||||
Args:
|
||||
total_chunks: Expected total number of chunks
|
||||
"""
|
||||
|
||||
def __init__(self, total_chunks: int):
|
||||
self.total_chunks = total_chunks
|
||||
self._heap: List[tuple[int, ChunkResult]] = []
|
||||
self._next_sequence = 0
|
||||
self._emitted: List[ChunkResult] = []
|
||||
self._seen_sequences: set[int] = set()
|
||||
# Sliding window for throughput calculation
|
||||
self._recent_times: deque[float] = deque(maxlen=50)
|
||||
|
||||
def add(self, result: ChunkResult) -> List[ChunkResult]:
|
||||
"""
|
||||
Add a result and return any newly emittable results in order.
|
||||
|
||||
Args:
|
||||
result: A ChunkResult (may arrive out of order)
|
||||
|
||||
Returns:
|
||||
List of results that can now be emitted in sequence order
|
||||
(may be empty if we're still waiting for earlier sequences)
|
||||
|
||||
Raises:
|
||||
ReassemblyError: If a duplicate sequence is received
|
||||
"""
|
||||
if result.sequence in self._seen_sequences:
|
||||
raise ReassemblyError(
|
||||
f"Duplicate sequence number: {result.sequence}"
|
||||
)
|
||||
self._seen_sequences.add(result.sequence)
|
||||
|
||||
# Track processing time for throughput
|
||||
if result.processing_time > 0:
|
||||
self._recent_times.append(result.processing_time)
|
||||
|
||||
# Push to min-heap
|
||||
heapq.heappush(self._heap, (result.sequence, result))
|
||||
|
||||
# Emit all consecutive results starting from _next_sequence
|
||||
newly_emitted = []
|
||||
while self._heap and self._heap[0][0] == self._next_sequence:
|
||||
_, emitted_result = heapq.heappop(self._heap)
|
||||
self._emitted.append(emitted_result)
|
||||
newly_emitted.append(emitted_result)
|
||||
self._next_sequence += 1
|
||||
|
||||
return newly_emitted
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
"""True if all expected chunks have been emitted in order."""
|
||||
return self._next_sequence == self.total_chunks
|
||||
|
||||
@property
|
||||
def buffered_count(self) -> int:
|
||||
"""Number of results waiting in the heap (arrived out of order)."""
|
||||
return len(self._heap)
|
||||
|
||||
@property
|
||||
def emitted_count(self) -> int:
|
||||
"""Number of results emitted in sequence order."""
|
||||
return len(self._emitted)
|
||||
|
||||
@property
|
||||
def avg_processing_time(self) -> float:
|
||||
"""Average processing time from recent results (sliding window)."""
|
||||
if not self._recent_times:
|
||||
return 0.0
|
||||
return sum(self._recent_times) / len(self._recent_times)
|
||||
|
||||
def get_ordered_results(self) -> List[ChunkResult]:
|
||||
"""Get all emitted results in sequence order."""
|
||||
return list(self._emitted)
|
||||
Reference in New Issue
Block a user