126 lines
4.0 KiB
Python
126 lines
4.0 KiB
Python
"""Session data loading — reads frame/transcript indexes and segment manifests."""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
import ffmpeg as ffmpeg_lib
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segment manifest — maps each recording segment to its global time offset
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def probe_duration(path: Path) -> float:
|
|
"""Probe a media file's duration via ffprobe. Returns 0.0 on failure."""
|
|
try:
|
|
info = ffmpeg_lib.probe(str(path))
|
|
dur = float(info.get("format", {}).get("duration", 0))
|
|
if dur > 0:
|
|
return dur
|
|
for s in info.get("streams", []):
|
|
sdur = float(s.get("duration", 0))
|
|
if sdur > 0:
|
|
return sdur
|
|
except Exception as e:
|
|
log.debug("probe_duration failed for %s: %s", path, e)
|
|
# Fallback: rough estimate from file size (~500kbps)
|
|
try:
|
|
return path.stat().st_size / 65_000
|
|
except Exception:
|
|
return 0.0
|
|
|
|
|
|
def build_segment_manifest(stream_dir: Path) -> list[dict]:
|
|
"""Probe all recording_*.mp4 in *stream_dir* and return a manifest.
|
|
|
|
Each entry: {path, index, duration, global_offset}.
|
|
Sorted by segment index. Recomputable from files at any time.
|
|
"""
|
|
segments = sorted(stream_dir.glob("recording_*.mp4"))
|
|
manifest = []
|
|
offset = 0.0
|
|
for i, seg in enumerate(segments):
|
|
dur = probe_duration(seg)
|
|
manifest.append({
|
|
"path": str(seg),
|
|
"index": i,
|
|
"duration": dur,
|
|
"global_offset": offset,
|
|
})
|
|
offset += dur
|
|
return manifest
|
|
|
|
|
|
def write_segment_manifest(session_dir: Path, manifest: list[dict]) -> None:
|
|
"""Write segments.json to *session_dir*."""
|
|
path = session_dir / "segments.json"
|
|
path.write_text(json.dumps(manifest, indent=2))
|
|
|
|
|
|
def load_segment_manifest(session_dir: Path) -> list[dict]:
|
|
"""Read segments.json. Returns [] if missing."""
|
|
path = session_dir / "segments.json"
|
|
if not path.exists():
|
|
return []
|
|
try:
|
|
return json.loads(path.read_text())
|
|
except (json.JSONDecodeError, IOError):
|
|
return []
|
|
|
|
|
|
def rebuild_manifest(session_dir: Path) -> list[dict]:
|
|
"""Recalculate segment manifest from actual files and write it."""
|
|
stream_dir = session_dir / "stream"
|
|
manifest = build_segment_manifest(stream_dir)
|
|
write_segment_manifest(session_dir, manifest)
|
|
log.info("Rebuilt manifest: %d segments, total %.1fs",
|
|
len(manifest),
|
|
sum(s["duration"] for s in manifest))
|
|
return manifest
|
|
|
|
|
|
def global_time_to_segment(manifest: list[dict], global_time: float):
|
|
"""Map a global timestamp to (segment_entry, local_time).
|
|
|
|
Returns the segment containing *global_time* and the time offset
|
|
within that segment. Returns (None, 0.0) if manifest is empty.
|
|
"""
|
|
if not manifest:
|
|
return None, 0.0
|
|
for seg in reversed(manifest):
|
|
if global_time >= seg["global_offset"]:
|
|
local = global_time - seg["global_offset"]
|
|
return seg, local
|
|
return manifest[0], global_time
|
|
|
|
|
|
def load_frame_index(frames_dir: Path) -> list[dict]:
|
|
"""Read frames/index.json and return list of {id, path, timestamp}.
|
|
|
|
Returns only entries where the image file exists on disk.
|
|
Paths are resolved relative to *frames_dir* if needed.
|
|
"""
|
|
index_path = frames_dir / "index.json"
|
|
if not index_path.exists():
|
|
return []
|
|
try:
|
|
index = json.loads(index_path.read_text())
|
|
except (json.JSONDecodeError, IOError):
|
|
return []
|
|
result = []
|
|
for entry in index:
|
|
fpath = Path(entry["path"])
|
|
if not fpath.exists():
|
|
fpath = frames_dir / fpath.name
|
|
if not fpath.exists():
|
|
continue
|
|
result.append({
|
|
"id": entry["id"],
|
|
"path": fpath,
|
|
"timestamp": entry.get("timestamp", 0),
|
|
})
|
|
return result
|