chunker and ui

This commit is contained in:
2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions

86
core/chunker/chunker.py Normal file
View File

@@ -0,0 +1,86 @@
"""
Chunker — probes a media file and yields time-based Chunk objects.
Demonstrates:
- Function parameters and defaults (Interview Topic 1)
- List comprehensions and efficient iteration / generators (Interview Topic 3)
"""
import math
import os
from typing import Generator
from core.ffmpeg.probe import probe_file
from .exceptions import ChunkReadError
from .models import Chunk
class Chunker:
"""
Splits a media file into time-based chunks via a generator.
Uses FFmpeg probe to get duration, then yields Chunk objects
representing time segments (no data read — extraction happens in the processor).
Args:
file_path: Path to the source media file
chunk_duration: Duration of each chunk in seconds (default: 10.0)
"""
def __init__(self, file_path: str, chunk_duration: float = 10.0):
if not os.path.isfile(file_path):
raise ChunkReadError(f"File not found: {file_path}")
if chunk_duration <= 0:
raise ValueError("chunk_duration must be positive")
self.file_path = file_path
self.chunk_duration = chunk_duration
self.file_size = os.path.getsize(file_path)
self.source_duration = self._probe_duration()
def _probe_duration(self) -> float:
"""Get source file duration via FFmpeg probe."""
try:
result = probe_file(self.file_path)
if result.duration is None or result.duration <= 0:
raise ChunkReadError(
f"Cannot determine duration for {self.file_path}"
)
return result.duration
except ChunkReadError:
raise
except Exception as e:
raise ChunkReadError(
f"Failed to probe {self.file_path}: {e}"
) from e
@property
def expected_chunks(self) -> int:
"""Calculate expected number of chunks (last chunk may be shorter)."""
if self.source_duration <= 0:
return 0
return math.ceil(self.source_duration / self.chunk_duration)
def chunks(self) -> Generator[Chunk, None, None]:
"""
Yield Chunk objects representing time segments of the source file.
Generator-based: chunks are yielded on demand.
Each chunk defines a time range — actual extraction is done by the processor.
"""
total = self.expected_chunks
for sequence in range(total):
start_time = sequence * self.chunk_duration
end_time = min(
start_time + self.chunk_duration, self.source_duration
)
duration = end_time - start_time
yield Chunk(
sequence=sequence,
start_time=start_time,
end_time=end_time,
source_path=self.file_path,
duration=duration,
)