add root readme
This commit is contained in:
75
cht/summary/output.py
Normal file
75
cht/summary/output.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Format merged segments as a sequential LLM-ready transcript.
|
||||
|
||||
Direct port of mts/meetus/transcript_merger.py:_format_detailed (line 249).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def format_detailed(merged_segments: list[dict], *, frames_relative_to: Path | None = None) -> str:
|
||||
"""Render the interleaved transcript.
|
||||
|
||||
If `frames_relative_to` is given, frame paths are rewritten relative to it.
|
||||
"""
|
||||
lines = []
|
||||
lines.append("=" * 80)
|
||||
lines.append("ENHANCED MEETING TRANSCRIPT")
|
||||
lines.append("Audio transcript + Screen frames")
|
||||
lines.append("=" * 80)
|
||||
lines.append("")
|
||||
|
||||
for seg in merged_segments:
|
||||
ts = _format_timestamp(seg["timestamp"])
|
||||
if seg["type"] == "audio":
|
||||
speaker = seg.get("speaker") or "SPEAKER"
|
||||
lines.append(f"[{ts}] {speaker}:")
|
||||
lines.append(f" {seg['text']}")
|
||||
lines.append("")
|
||||
else:
|
||||
lines.append(f"[{ts}] SCREEN CONTENT:")
|
||||
fp = seg.get("frame_path")
|
||||
if fp:
|
||||
if frames_relative_to is not None:
|
||||
try:
|
||||
fp = str(Path(fp).resolve().relative_to(frames_relative_to.resolve()))
|
||||
except ValueError:
|
||||
fp = str(fp)
|
||||
else:
|
||||
fp = str(fp)
|
||||
lines.append(f" Frame: {fp}")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_timestamp(seconds: float) -> str:
|
||||
seconds = int(seconds)
|
||||
h, rem = divmod(seconds, 3600)
|
||||
m, s = divmod(rem, 60)
|
||||
if h:
|
||||
return f"{h:02d}:{m:02d}:{s:02d}"
|
||||
return f"{m:02d}:{s:02d}"
|
||||
|
||||
|
||||
def write_outputs(session_dir: Path, merged: list[dict], *, name: str | None = None) -> Path:
|
||||
"""Write `<name>_enhanced.txt` and `merged.json` under `session_dir/summary`.
|
||||
|
||||
Returns the path of the enhanced transcript.
|
||||
"""
|
||||
summary_dir = session_dir / "summary"
|
||||
summary_dir.mkdir(parents=True, exist_ok=True)
|
||||
name = name or session_dir.name
|
||||
|
||||
text = format_detailed(merged, frames_relative_to=session_dir)
|
||||
text_path = summary_dir / f"{name}_enhanced.txt"
|
||||
text_path.write_text(text)
|
||||
|
||||
merged_path = summary_dir / "merged.json"
|
||||
merged_path.write_text(json.dumps(merged, indent=2, default=str))
|
||||
|
||||
log.info("Wrote %s (%d entries)", text_path, len(merged))
|
||||
return text_path
|
||||
Reference in New Issue
Block a user