chunker ui redo

This commit is contained in:
2026-03-15 16:03:53 -03:00
parent d5a3372d6b
commit b40bd68411
62 changed files with 5460 additions and 1493 deletions

View File

@@ -57,6 +57,8 @@ class Pipeline:
queue_size: int = 10,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
output_dir: Optional[str] = None,
start_time: Optional[float] = None,
end_time: Optional[float] = None,
):
self.source = source
self.chunk_duration = chunk_duration
@@ -66,6 +68,8 @@ class Pipeline:
self.queue_size = queue_size
self.event_callback = event_callback
self.output_dir = output_dir
self.start_time = start_time
self.end_time = end_time
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
"""Emit an event if callback is registered."""
@@ -92,6 +96,19 @@ class Pipeline:
finally:
chunk_queue.close()
def _monitor_progress(
self, start_time: float, file_size: int, stop_event: threading.Event
) -> None:
"""Monitor thread: emit pipeline_progress every 500ms."""
while not stop_event.is_set():
elapsed = time.monotonic() - start_time
mb = file_size / (1024 * 1024)
self._emit("pipeline_progress", {
"elapsed": round(elapsed, 2),
"throughput_mbps": round(mb / elapsed, 2) if elapsed > 0 else 0,
})
stop_event.wait(0.5)
def _write_manifest(
self, result: PipelineResult, source_duration: float
) -> None:
@@ -146,7 +163,12 @@ class Pipeline:
try:
# Stage 1: Set up chunker (probes file for duration)
chunker = Chunker(self.source, self.chunk_duration)
chunker = Chunker(
self.source,
self.chunk_duration,
start_time=self.start_time,
end_time=self.end_time,
)
total_chunks = chunker.expected_chunks
if total_chunks == 0:
@@ -170,9 +192,18 @@ class Pipeline:
output_dir=self.output_dir,
)
# Stage 3: Start workers, then produce chunks
# Stage 3: Start workers, monitor, then produce chunks
pool.start()
monitor_stop = threading.Event()
monitor = threading.Thread(
target=self._monitor_progress,
args=(start_time, chunker.file_size, monitor_stop),
name="progress-monitor",
daemon=True,
)
monitor.start()
producer = threading.Thread(
target=self._produce_chunks,
args=(chunker, chunk_queue),
@@ -185,6 +216,10 @@ class Pipeline:
all_results = pool.wait()
producer.join(timeout=5.0)
# Stop monitor
monitor_stop.set()
monitor.join(timeout=2.0)
# Stage 5: Collect results in order
collector = ResultCollector(total_chunks)
for r in all_results: