Files
mitus/cht/session.py
2026-04-03 06:40:08 -03:00

126 lines
4.0 KiB
Python

"""Session data loading — reads frame/transcript indexes and segment manifests."""
import json
import logging
from pathlib import Path
import ffmpeg as ffmpeg_lib
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Segment manifest — maps each recording segment to its global time offset
# ---------------------------------------------------------------------------
def probe_duration(path: Path) -> float:
"""Probe a media file's duration via ffprobe. Returns 0.0 on failure."""
try:
info = ffmpeg_lib.probe(str(path))
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
for s in info.get("streams", []):
sdur = float(s.get("duration", 0))
if sdur > 0:
return sdur
except Exception as e:
log.debug("probe_duration failed for %s: %s", path, e)
# Fallback: rough estimate from file size (~500kbps)
try:
return path.stat().st_size / 65_000
except Exception:
return 0.0
def build_segment_manifest(stream_dir: Path) -> list[dict]:
"""Probe all recording_*.mp4 in *stream_dir* and return a manifest.
Each entry: {path, index, duration, global_offset}.
Sorted by segment index. Recomputable from files at any time.
"""
segments = sorted(stream_dir.glob("recording_*.mp4"))
manifest = []
offset = 0.0
for i, seg in enumerate(segments):
dur = probe_duration(seg)
manifest.append({
"path": str(seg),
"index": i,
"duration": dur,
"global_offset": offset,
})
offset += dur
return manifest
def write_segment_manifest(session_dir: Path, manifest: list[dict]) -> None:
"""Write segments.json to *session_dir*."""
path = session_dir / "segments.json"
path.write_text(json.dumps(manifest, indent=2))
def load_segment_manifest(session_dir: Path) -> list[dict]:
"""Read segments.json. Returns [] if missing."""
path = session_dir / "segments.json"
if not path.exists():
return []
try:
return json.loads(path.read_text())
except (json.JSONDecodeError, IOError):
return []
def rebuild_manifest(session_dir: Path) -> list[dict]:
"""Recalculate segment manifest from actual files and write it."""
stream_dir = session_dir / "stream"
manifest = build_segment_manifest(stream_dir)
write_segment_manifest(session_dir, manifest)
log.info("Rebuilt manifest: %d segments, total %.1fs",
len(manifest),
sum(s["duration"] for s in manifest))
return manifest
def global_time_to_segment(manifest: list[dict], global_time: float):
"""Map a global timestamp to (segment_entry, local_time).
Returns the segment containing *global_time* and the time offset
within that segment. Returns (None, 0.0) if manifest is empty.
"""
if not manifest:
return None, 0.0
for seg in reversed(manifest):
if global_time >= seg["global_offset"]:
local = global_time - seg["global_offset"]
return seg, local
return manifest[0], global_time
def load_frame_index(frames_dir: Path) -> list[dict]:
"""Read frames/index.json and return list of {id, path, timestamp}.
Returns only entries where the image file exists on disk.
Paths are resolved relative to *frames_dir* if needed.
"""
index_path = frames_dir / "index.json"
if not index_path.exists():
return []
try:
index = json.loads(index_path.read_text())
except (json.JSONDecodeError, IOError):
return []
result = []
for entry in index:
fpath = Path(entry["path"])
if not fpath.exists():
fpath = frames_dir / fpath.name
if not fpath.exists():
continue
result.append({
"id": entry["id"],
"path": fpath,
"timestamp": entry.get("timestamp", 0),
})
return result