Files
mitus/cht/summary/output.py
2026-05-07 13:04:40 -03:00

76 lines
2.4 KiB
Python

"""Format merged segments as a sequential LLM-ready transcript.
Direct port of mts/meetus/transcript_merger.py:_format_detailed (line 249).
"""
import json
import logging
from pathlib import Path
log = logging.getLogger(__name__)
def format_detailed(merged_segments: list[dict], *, frames_relative_to: Path | None = None) -> str:
"""Render the interleaved transcript.
If `frames_relative_to` is given, frame paths are rewritten relative to it.
"""
lines = []
lines.append("=" * 80)
lines.append("ENHANCED MEETING TRANSCRIPT")
lines.append("Audio transcript + Screen frames")
lines.append("=" * 80)
lines.append("")
for seg in merged_segments:
ts = _format_timestamp(seg["timestamp"])
if seg["type"] == "audio":
speaker = seg.get("speaker") or "SPEAKER"
lines.append(f"[{ts}] {speaker}:")
lines.append(f" {seg['text']}")
lines.append("")
else:
lines.append(f"[{ts}] SCREEN CONTENT:")
fp = seg.get("frame_path")
if fp:
if frames_relative_to is not None:
try:
fp = str(Path(fp).resolve().relative_to(frames_relative_to.resolve()))
except ValueError:
fp = str(fp)
else:
fp = str(fp)
lines.append(f" Frame: {fp}")
lines.append("")
return "\n".join(lines)
def _format_timestamp(seconds: float) -> str:
seconds = int(seconds)
h, rem = divmod(seconds, 3600)
m, s = divmod(rem, 60)
if h:
return f"{h:02d}:{m:02d}:{s:02d}"
return f"{m:02d}:{s:02d}"
def write_outputs(session_dir: Path, merged: list[dict], *, name: str | None = None) -> Path:
"""Write `<name>_enhanced.txt` and `merged.json` under `session_dir/summary`.
Returns the path of the enhanced transcript.
"""
summary_dir = session_dir / "summary"
summary_dir.mkdir(parents=True, exist_ok=True)
name = name or session_dir.name
text = format_detailed(merged, frames_relative_to=session_dir)
text_path = summary_dir / f"{name}_enhanced.txt"
text_path.write_text(text)
merged_path = summary_dir / "merged.json"
merged_path.write_text(json.dumps(merged, indent=2, default=str))
log.info("Wrote %s (%d entries)", text_path, len(merged))
return text_path