scrub optimization
This commit is contained in:
@@ -1,12 +1,102 @@
|
||||
"""Session data loading — reads frame/transcript indexes, returns plain data."""
|
||||
"""Session data loading — reads frame/transcript indexes and segment manifests."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import ffmpeg as ffmpeg_lib
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Segment manifest — maps each recording segment to its global time offset
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def probe_duration(path: Path) -> float:
|
||||
"""Probe a media file's duration via ffprobe. Returns 0.0 on failure."""
|
||||
try:
|
||||
info = ffmpeg_lib.probe(str(path))
|
||||
dur = float(info.get("format", {}).get("duration", 0))
|
||||
if dur > 0:
|
||||
return dur
|
||||
for s in info.get("streams", []):
|
||||
sdur = float(s.get("duration", 0))
|
||||
if sdur > 0:
|
||||
return sdur
|
||||
except Exception as e:
|
||||
log.debug("probe_duration failed for %s: %s", path, e)
|
||||
# Fallback: rough estimate from file size (~500kbps)
|
||||
try:
|
||||
return path.stat().st_size / 65_000
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def build_segment_manifest(stream_dir: Path) -> list[dict]:
|
||||
"""Probe all recording_*.mp4 in *stream_dir* and return a manifest.
|
||||
|
||||
Each entry: {path, index, duration, global_offset}.
|
||||
Sorted by segment index. Recomputable from files at any time.
|
||||
"""
|
||||
segments = sorted(stream_dir.glob("recording_*.mp4"))
|
||||
manifest = []
|
||||
offset = 0.0
|
||||
for i, seg in enumerate(segments):
|
||||
dur = probe_duration(seg)
|
||||
manifest.append({
|
||||
"path": str(seg),
|
||||
"index": i,
|
||||
"duration": dur,
|
||||
"global_offset": offset,
|
||||
})
|
||||
offset += dur
|
||||
return manifest
|
||||
|
||||
|
||||
def write_segment_manifest(session_dir: Path, manifest: list[dict]) -> None:
|
||||
"""Write segments.json to *session_dir*."""
|
||||
path = session_dir / "segments.json"
|
||||
path.write_text(json.dumps(manifest, indent=2))
|
||||
|
||||
|
||||
def load_segment_manifest(session_dir: Path) -> list[dict]:
|
||||
"""Read segments.json. Returns [] if missing."""
|
||||
path = session_dir / "segments.json"
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
return json.loads(path.read_text())
|
||||
except (json.JSONDecodeError, IOError):
|
||||
return []
|
||||
|
||||
|
||||
def rebuild_manifest(session_dir: Path) -> list[dict]:
|
||||
"""Recalculate segment manifest from actual files and write it."""
|
||||
stream_dir = session_dir / "stream"
|
||||
manifest = build_segment_manifest(stream_dir)
|
||||
write_segment_manifest(session_dir, manifest)
|
||||
log.info("Rebuilt manifest: %d segments, total %.1fs",
|
||||
len(manifest),
|
||||
sum(s["duration"] for s in manifest))
|
||||
return manifest
|
||||
|
||||
|
||||
def global_time_to_segment(manifest: list[dict], global_time: float):
|
||||
"""Map a global timestamp to (segment_entry, local_time).
|
||||
|
||||
Returns the segment containing *global_time* and the time offset
|
||||
within that segment. Returns (None, 0.0) if manifest is empty.
|
||||
"""
|
||||
if not manifest:
|
||||
return None, 0.0
|
||||
for seg in reversed(manifest):
|
||||
if global_time >= seg["global_offset"]:
|
||||
local = global_time - seg["global_offset"]
|
||||
return seg, local
|
||||
return manifest[0], global_time
|
||||
|
||||
|
||||
def load_frame_index(frames_dir: Path) -> list[dict]:
|
||||
"""Read frames/index.json and return list of {id, path, timestamp}.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user