chunker ui redo
This commit is contained in:
@@ -28,7 +28,13 @@ class Chunker:
|
||||
chunk_duration: Duration of each chunk in seconds (default: 10.0)
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, chunk_duration: float = 10.0):
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
chunk_duration: float = 10.0,
|
||||
start_time: float | None = None,
|
||||
end_time: float | None = None,
|
||||
):
|
||||
if not os.path.isfile(file_path):
|
||||
raise ChunkReadError(f"File not found: {file_path}")
|
||||
if chunk_duration <= 0:
|
||||
@@ -37,7 +43,16 @@ class Chunker:
|
||||
self.file_path = file_path
|
||||
self.chunk_duration = chunk_duration
|
||||
self.file_size = os.path.getsize(file_path)
|
||||
self.source_duration = self._probe_duration()
|
||||
full_duration = self._probe_duration()
|
||||
|
||||
# Apply time range
|
||||
self.range_start = max(start_time or 0.0, 0.0)
|
||||
self.range_end = min(end_time or full_duration, full_duration)
|
||||
if self.range_start >= self.range_end:
|
||||
raise ValueError(
|
||||
f"Invalid range: start={self.range_start} >= end={self.range_end}"
|
||||
)
|
||||
self.source_duration = self.range_end - self.range_start
|
||||
|
||||
def _probe_duration(self) -> float:
|
||||
"""Get source file duration via FFmpeg probe."""
|
||||
@@ -71,9 +86,9 @@ class Chunker:
|
||||
"""
|
||||
total = self.expected_chunks
|
||||
for sequence in range(total):
|
||||
start_time = sequence * self.chunk_duration
|
||||
start_time = self.range_start + sequence * self.chunk_duration
|
||||
end_time = min(
|
||||
start_time + self.chunk_duration, self.source_duration
|
||||
start_time + self.chunk_duration, self.range_end
|
||||
)
|
||||
duration = end_time - start_time
|
||||
|
||||
|
||||
@@ -57,6 +57,8 @@ class Pipeline:
|
||||
queue_size: int = 10,
|
||||
event_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
start_time: Optional[float] = None,
|
||||
end_time: Optional[float] = None,
|
||||
):
|
||||
self.source = source
|
||||
self.chunk_duration = chunk_duration
|
||||
@@ -66,6 +68,8 @@ class Pipeline:
|
||||
self.queue_size = queue_size
|
||||
self.event_callback = event_callback
|
||||
self.output_dir = output_dir
|
||||
self.start_time = start_time
|
||||
self.end_time = end_time
|
||||
|
||||
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
|
||||
"""Emit an event if callback is registered."""
|
||||
@@ -92,6 +96,19 @@ class Pipeline:
|
||||
finally:
|
||||
chunk_queue.close()
|
||||
|
||||
def _monitor_progress(
|
||||
self, start_time: float, file_size: int, stop_event: threading.Event
|
||||
) -> None:
|
||||
"""Monitor thread: emit pipeline_progress every 500ms."""
|
||||
while not stop_event.is_set():
|
||||
elapsed = time.monotonic() - start_time
|
||||
mb = file_size / (1024 * 1024)
|
||||
self._emit("pipeline_progress", {
|
||||
"elapsed": round(elapsed, 2),
|
||||
"throughput_mbps": round(mb / elapsed, 2) if elapsed > 0 else 0,
|
||||
})
|
||||
stop_event.wait(0.5)
|
||||
|
||||
def _write_manifest(
|
||||
self, result: PipelineResult, source_duration: float
|
||||
) -> None:
|
||||
@@ -146,7 +163,12 @@ class Pipeline:
|
||||
|
||||
try:
|
||||
# Stage 1: Set up chunker (probes file for duration)
|
||||
chunker = Chunker(self.source, self.chunk_duration)
|
||||
chunker = Chunker(
|
||||
self.source,
|
||||
self.chunk_duration,
|
||||
start_time=self.start_time,
|
||||
end_time=self.end_time,
|
||||
)
|
||||
total_chunks = chunker.expected_chunks
|
||||
|
||||
if total_chunks == 0:
|
||||
@@ -170,9 +192,18 @@ class Pipeline:
|
||||
output_dir=self.output_dir,
|
||||
)
|
||||
|
||||
# Stage 3: Start workers, then produce chunks
|
||||
# Stage 3: Start workers, monitor, then produce chunks
|
||||
pool.start()
|
||||
|
||||
monitor_stop = threading.Event()
|
||||
monitor = threading.Thread(
|
||||
target=self._monitor_progress,
|
||||
args=(start_time, chunker.file_size, monitor_stop),
|
||||
name="progress-monitor",
|
||||
daemon=True,
|
||||
)
|
||||
monitor.start()
|
||||
|
||||
producer = threading.Thread(
|
||||
target=self._produce_chunks,
|
||||
args=(chunker, chunk_queue),
|
||||
@@ -185,6 +216,10 @@ class Pipeline:
|
||||
all_results = pool.wait()
|
||||
producer.join(timeout=5.0)
|
||||
|
||||
# Stop monitor
|
||||
monitor_stop.set()
|
||||
monitor.join(timeout=2.0)
|
||||
|
||||
# Stage 5: Collect results in order
|
||||
collector = ResultCollector(total_chunks)
|
||||
for r in all_results:
|
||||
|
||||
@@ -124,6 +124,7 @@ class Worker:
|
||||
self._emit("chunk_processing", {
|
||||
"sequence": chunk.sequence,
|
||||
"state": "processing",
|
||||
"queue_size": self.chunk_queue.qsize(),
|
||||
})
|
||||
|
||||
result = self._process_with_retry(chunk)
|
||||
@@ -135,6 +136,7 @@ class Worker:
|
||||
"success": result.success,
|
||||
"processing_time": result.processing_time,
|
||||
"retries": result.retries,
|
||||
"queue_size": self.chunk_queue.qsize(),
|
||||
})
|
||||
|
||||
self._emit("worker_status", {"state": "stopped"})
|
||||
|
||||
Reference in New Issue
Block a user