chunker and ui
This commit is contained in:
86
core/chunker/chunker.py
Normal file
86
core/chunker/chunker.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Chunker — probes a media file and yields time-based Chunk objects.
|
||||
|
||||
Demonstrates:
|
||||
- Function parameters and defaults (Interview Topic 1)
|
||||
- List comprehensions and efficient iteration / generators (Interview Topic 3)
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
from typing import Generator
|
||||
|
||||
from core.ffmpeg.probe import probe_file
|
||||
|
||||
from .exceptions import ChunkReadError
|
||||
from .models import Chunk
|
||||
|
||||
|
||||
class Chunker:
|
||||
"""
|
||||
Splits a media file into time-based chunks via a generator.
|
||||
|
||||
Uses FFmpeg probe to get duration, then yields Chunk objects
|
||||
representing time segments (no data read — extraction happens in the processor).
|
||||
|
||||
Args:
|
||||
file_path: Path to the source media file
|
||||
chunk_duration: Duration of each chunk in seconds (default: 10.0)
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, chunk_duration: float = 10.0):
|
||||
if not os.path.isfile(file_path):
|
||||
raise ChunkReadError(f"File not found: {file_path}")
|
||||
if chunk_duration <= 0:
|
||||
raise ValueError("chunk_duration must be positive")
|
||||
|
||||
self.file_path = file_path
|
||||
self.chunk_duration = chunk_duration
|
||||
self.file_size = os.path.getsize(file_path)
|
||||
self.source_duration = self._probe_duration()
|
||||
|
||||
def _probe_duration(self) -> float:
|
||||
"""Get source file duration via FFmpeg probe."""
|
||||
try:
|
||||
result = probe_file(self.file_path)
|
||||
if result.duration is None or result.duration <= 0:
|
||||
raise ChunkReadError(
|
||||
f"Cannot determine duration for {self.file_path}"
|
||||
)
|
||||
return result.duration
|
||||
except ChunkReadError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ChunkReadError(
|
||||
f"Failed to probe {self.file_path}: {e}"
|
||||
) from e
|
||||
|
||||
@property
|
||||
def expected_chunks(self) -> int:
|
||||
"""Calculate expected number of chunks (last chunk may be shorter)."""
|
||||
if self.source_duration <= 0:
|
||||
return 0
|
||||
return math.ceil(self.source_duration / self.chunk_duration)
|
||||
|
||||
def chunks(self) -> Generator[Chunk, None, None]:
|
||||
"""
|
||||
Yield Chunk objects representing time segments of the source file.
|
||||
|
||||
Generator-based: chunks are yielded on demand.
|
||||
Each chunk defines a time range — actual extraction is done by the processor.
|
||||
"""
|
||||
total = self.expected_chunks
|
||||
for sequence in range(total):
|
||||
start_time = sequence * self.chunk_duration
|
||||
end_time = min(
|
||||
start_time + self.chunk_duration, self.source_duration
|
||||
)
|
||||
duration = end_time - start_time
|
||||
|
||||
yield Chunk(
|
||||
sequence=sequence,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
source_path=self.file_path,
|
||||
duration=duration,
|
||||
)
|
||||
Reference in New Issue
Block a user