chunker ui redo

This commit is contained in:
2026-03-15 16:03:53 -03:00
parent d5a3372d6b
commit b40bd68411
62 changed files with 5460 additions and 1493 deletions

View File

@@ -28,7 +28,13 @@ class Chunker:
chunk_duration: Duration of each chunk in seconds (default: 10.0)
"""
def __init__(self, file_path: str, chunk_duration: float = 10.0):
def __init__(
self,
file_path: str,
chunk_duration: float = 10.0,
start_time: float | None = None,
end_time: float | None = None,
):
if not os.path.isfile(file_path):
raise ChunkReadError(f"File not found: {file_path}")
if chunk_duration <= 0:
@@ -37,7 +43,16 @@ class Chunker:
self.file_path = file_path
self.chunk_duration = chunk_duration
self.file_size = os.path.getsize(file_path)
self.source_duration = self._probe_duration()
full_duration = self._probe_duration()
# Apply time range
self.range_start = max(start_time or 0.0, 0.0)
self.range_end = min(end_time or full_duration, full_duration)
if self.range_start >= self.range_end:
raise ValueError(
f"Invalid range: start={self.range_start} >= end={self.range_end}"
)
self.source_duration = self.range_end - self.range_start
def _probe_duration(self) -> float:
"""Get source file duration via FFmpeg probe."""
@@ -71,9 +86,9 @@ class Chunker:
"""
total = self.expected_chunks
for sequence in range(total):
start_time = sequence * self.chunk_duration
start_time = self.range_start + sequence * self.chunk_duration
end_time = min(
start_time + self.chunk_duration, self.source_duration
start_time + self.chunk_duration, self.range_end
)
duration = end_time - start_time

View File

@@ -57,6 +57,8 @@ class Pipeline:
queue_size: int = 10,
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
output_dir: Optional[str] = None,
start_time: Optional[float] = None,
end_time: Optional[float] = None,
):
self.source = source
self.chunk_duration = chunk_duration
@@ -66,6 +68,8 @@ class Pipeline:
self.queue_size = queue_size
self.event_callback = event_callback
self.output_dir = output_dir
self.start_time = start_time
self.end_time = end_time
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
"""Emit an event if callback is registered."""
@@ -92,6 +96,19 @@ class Pipeline:
finally:
chunk_queue.close()
def _monitor_progress(
self, start_time: float, file_size: int, stop_event: threading.Event
) -> None:
"""Monitor thread: emit pipeline_progress every 500ms."""
while not stop_event.is_set():
elapsed = time.monotonic() - start_time
mb = file_size / (1024 * 1024)
self._emit("pipeline_progress", {
"elapsed": round(elapsed, 2),
"throughput_mbps": round(mb / elapsed, 2) if elapsed > 0 else 0,
})
stop_event.wait(0.5)
def _write_manifest(
self, result: PipelineResult, source_duration: float
) -> None:
@@ -146,7 +163,12 @@ class Pipeline:
try:
# Stage 1: Set up chunker (probes file for duration)
chunker = Chunker(self.source, self.chunk_duration)
chunker = Chunker(
self.source,
self.chunk_duration,
start_time=self.start_time,
end_time=self.end_time,
)
total_chunks = chunker.expected_chunks
if total_chunks == 0:
@@ -170,9 +192,18 @@ class Pipeline:
output_dir=self.output_dir,
)
# Stage 3: Start workers, then produce chunks
# Stage 3: Start workers, monitor, then produce chunks
pool.start()
monitor_stop = threading.Event()
monitor = threading.Thread(
target=self._monitor_progress,
args=(start_time, chunker.file_size, monitor_stop),
name="progress-monitor",
daemon=True,
)
monitor.start()
producer = threading.Thread(
target=self._produce_chunks,
args=(chunker, chunk_queue),
@@ -185,6 +216,10 @@ class Pipeline:
all_results = pool.wait()
producer.join(timeout=5.0)
# Stop monitor
monitor_stop.set()
monitor.join(timeout=2.0)
# Stage 5: Collect results in order
collector = ResultCollector(total_chunks)
for r in all_results:

View File

@@ -124,6 +124,7 @@ class Worker:
self._emit("chunk_processing", {
"sequence": chunk.sequence,
"state": "processing",
"queue_size": self.chunk_queue.qsize(),
})
result = self._process_with_retry(chunk)
@@ -135,6 +136,7 @@ class Worker:
"success": result.success,
"processing_time": result.processing_time,
"retries": result.retries,
"queue_size": self.chunk_queue.qsize(),
})
self._emit("worker_status", {"state": "stopped"})