"""Format merged segments as a sequential LLM-ready transcript. Direct port of mts/meetus/transcript_merger.py:_format_detailed (line 249). """ import json import logging from pathlib import Path log = logging.getLogger(__name__) def format_detailed(merged_segments: list[dict], *, frames_relative_to: Path | None = None) -> str: """Render the interleaved transcript. If `frames_relative_to` is given, frame paths are rewritten relative to it. """ lines = [] lines.append("=" * 80) lines.append("ENHANCED MEETING TRANSCRIPT") lines.append("Audio transcript + Screen frames") lines.append("=" * 80) lines.append("") for seg in merged_segments: ts = _format_timestamp(seg["timestamp"]) if seg["type"] == "audio": speaker = seg.get("speaker") or "SPEAKER" lines.append(f"[{ts}] {speaker}:") lines.append(f" {seg['text']}") lines.append("") else: lines.append(f"[{ts}] SCREEN CONTENT:") fp = seg.get("frame_path") if fp: if frames_relative_to is not None: try: fp = str(Path(fp).resolve().relative_to(frames_relative_to.resolve())) except ValueError: fp = str(fp) else: fp = str(fp) lines.append(f" Frame: {fp}") lines.append("") return "\n".join(lines) def _format_timestamp(seconds: float) -> str: seconds = int(seconds) h, rem = divmod(seconds, 3600) m, s = divmod(rem, 60) if h: return f"{h:02d}:{m:02d}:{s:02d}" return f"{m:02d}:{s:02d}" def write_outputs(session_dir: Path, merged: list[dict], *, name: str | None = None) -> Path: """Write `_enhanced.txt` and `merged.json` under `session_dir/summary`. Returns the path of the enhanced transcript. """ summary_dir = session_dir / "summary" summary_dir.mkdir(parents=True, exist_ok=True) name = name or session_dir.name text = format_detailed(merged, frames_relative_to=session_dir) text_path = summary_dir / f"{name}_enhanced.txt" text_path.write_text(text) merged_path = summary_dir / "merged.json" merged_path.write_text(json.dumps(merged, indent=2, default=str)) log.info("Wrote %s (%d entries)", text_path, len(merged)) return text_path