76 lines
2.4 KiB
Python
76 lines
2.4 KiB
Python
"""Format merged segments as a sequential LLM-ready transcript.
|
|
|
|
Direct port of mts/meetus/transcript_merger.py:_format_detailed (line 249).
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def format_detailed(merged_segments: list[dict], *, frames_relative_to: Path | None = None) -> str:
|
|
"""Render the interleaved transcript.
|
|
|
|
If `frames_relative_to` is given, frame paths are rewritten relative to it.
|
|
"""
|
|
lines = []
|
|
lines.append("=" * 80)
|
|
lines.append("ENHANCED MEETING TRANSCRIPT")
|
|
lines.append("Audio transcript + Screen frames")
|
|
lines.append("=" * 80)
|
|
lines.append("")
|
|
|
|
for seg in merged_segments:
|
|
ts = _format_timestamp(seg["timestamp"])
|
|
if seg["type"] == "audio":
|
|
speaker = seg.get("speaker") or "SPEAKER"
|
|
lines.append(f"[{ts}] {speaker}:")
|
|
lines.append(f" {seg['text']}")
|
|
lines.append("")
|
|
else:
|
|
lines.append(f"[{ts}] SCREEN CONTENT:")
|
|
fp = seg.get("frame_path")
|
|
if fp:
|
|
if frames_relative_to is not None:
|
|
try:
|
|
fp = str(Path(fp).resolve().relative_to(frames_relative_to.resolve()))
|
|
except ValueError:
|
|
fp = str(fp)
|
|
else:
|
|
fp = str(fp)
|
|
lines.append(f" Frame: {fp}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _format_timestamp(seconds: float) -> str:
|
|
seconds = int(seconds)
|
|
h, rem = divmod(seconds, 3600)
|
|
m, s = divmod(rem, 60)
|
|
if h:
|
|
return f"{h:02d}:{m:02d}:{s:02d}"
|
|
return f"{m:02d}:{s:02d}"
|
|
|
|
|
|
def write_outputs(session_dir: Path, merged: list[dict], *, name: str | None = None) -> Path:
|
|
"""Write `<name>_enhanced.txt` and `merged.json` under `session_dir/summary`.
|
|
|
|
Returns the path of the enhanced transcript.
|
|
"""
|
|
summary_dir = session_dir / "summary"
|
|
summary_dir.mkdir(parents=True, exist_ok=True)
|
|
name = name or session_dir.name
|
|
|
|
text = format_detailed(merged, frames_relative_to=session_dir)
|
|
text_path = summary_dir / f"{name}_enhanced.txt"
|
|
text_path.write_text(text)
|
|
|
|
merged_path = summary_dir / "merged.json"
|
|
merged_path.write_text(json.dumps(merged, indent=2, default=str))
|
|
|
|
log.info("Wrote %s (%d entries)", text_path, len(merged))
|
|
return text_path
|