"""Session data loading — reads frame/transcript indexes and segment manifests.""" import json import logging from pathlib import Path import ffmpeg as ffmpeg_lib log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Segment manifest — maps each recording segment to its global time offset # --------------------------------------------------------------------------- def probe_duration(path: Path) -> float: """Probe a media file's duration via ffprobe. Returns 0.0 on failure.""" try: info = ffmpeg_lib.probe(str(path)) dur = float(info.get("format", {}).get("duration", 0)) if dur > 0: return dur for s in info.get("streams", []): sdur = float(s.get("duration", 0)) if sdur > 0: return sdur except Exception as e: log.debug("probe_duration failed for %s: %s", path, e) # Fallback: rough estimate from file size (~500kbps) try: return path.stat().st_size / 65_000 except Exception: return 0.0 def build_segment_manifest(stream_dir: Path) -> list[dict]: """Probe all recording_*.mp4 in *stream_dir* and return a manifest. Each entry: {path, index, duration, global_offset}. Sorted by segment index. Recomputable from files at any time. """ segments = sorted(stream_dir.glob("recording_*.mp4")) manifest = [] offset = 0.0 for i, seg in enumerate(segments): dur = probe_duration(seg) manifest.append({ "path": str(seg), "index": i, "duration": dur, "global_offset": offset, }) offset += dur return manifest def write_segment_manifest(session_dir: Path, manifest: list[dict]) -> None: """Write segments.json to *session_dir*.""" path = session_dir / "segments.json" path.write_text(json.dumps(manifest, indent=2)) def load_segment_manifest(session_dir: Path) -> list[dict]: """Read segments.json. Returns [] if missing.""" path = session_dir / "segments.json" if not path.exists(): return [] try: return json.loads(path.read_text()) except (json.JSONDecodeError, IOError): return [] def rebuild_manifest(session_dir: Path) -> list[dict]: """Recalculate segment manifest from actual files and write it.""" stream_dir = session_dir / "stream" manifest = build_segment_manifest(stream_dir) write_segment_manifest(session_dir, manifest) log.info("Rebuilt manifest: %d segments, total %.1fs", len(manifest), sum(s["duration"] for s in manifest)) return manifest def global_time_to_segment(manifest: list[dict], global_time: float): """Map a global timestamp to (segment_entry, local_time). Returns the segment containing *global_time* and the time offset within that segment. Returns (None, 0.0) if manifest is empty. """ if not manifest: return None, 0.0 for seg in reversed(manifest): if global_time >= seg["global_offset"]: local = global_time - seg["global_offset"] return seg, local return manifest[0], global_time def load_frame_index(frames_dir: Path) -> list[dict]: """Read frames/index.json and return list of {id, path, timestamp}. Returns only entries where the image file exists on disk. Paths are resolved relative to *frames_dir* if needed. """ index_path = frames_dir / "index.json" if not index_path.exists(): return [] try: index = json.loads(index_path.read_text()) except (json.JSONDecodeError, IOError): return [] result = [] for entry in index: fpath = Path(entry["path"]) if not fpath.exists(): fpath = frames_dir / fpath.name if not fpath.exists(): continue result.append({ "id": entry["id"], "path": fpath, "timestamp": entry.get("timestamp", 0), }) return result