73 lines
2.4 KiB
Python
73 lines
2.4 KiB
Python
"""End-to-end orchestrator for the summarization export.
|
|
|
|
Two operations:
|
|
diarize(...) — heavy: assembles audio, runs whisperx, caches diarized.json.
|
|
export(...) — cheap: merges cached diarization with selected frames and
|
|
writes <session>_enhanced.txt. Re-run any time the user
|
|
tweaks frame selection or speaker names.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from cht.session import load_frame_index
|
|
from cht.summary import audio, diarize, merger, output
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def diarized_path(session_dir: Path) -> Path:
|
|
return session_dir / "summary" / "diarized.json"
|
|
|
|
|
|
def has_diarization(session_dir: Path) -> bool:
|
|
return diarized_path(session_dir).exists()
|
|
|
|
|
|
def load_diarization(session_dir: Path) -> dict:
|
|
return json.loads(diarized_path(session_dir).read_text())
|
|
|
|
|
|
def run_diarization(session_dir: Path, *, num_speakers: int, on_progress=None) -> dict:
|
|
"""Assemble audio, run whisperx, cache and return the JSON."""
|
|
if on_progress:
|
|
on_progress("assembling audio", 0.05)
|
|
wav = audio.assemble_session_wav(session_dir)
|
|
|
|
if on_progress:
|
|
on_progress("running whisperx", 0.15)
|
|
summary_dir = session_dir / "summary"
|
|
diarized = diarize.run_whisperx(
|
|
wav, summary_dir,
|
|
num_speakers=num_speakers,
|
|
on_progress=lambda line, _frac: on_progress(line, None) if on_progress else None,
|
|
)
|
|
|
|
diarized_path(session_dir).write_text(json.dumps(diarized, indent=2))
|
|
if on_progress:
|
|
on_progress("diarization done", 1.0)
|
|
return diarized
|
|
|
|
|
|
def export(session_dir: Path,
|
|
*,
|
|
selected_frame_ids: set[str] | None = None,
|
|
name_map: dict[str, str] | None = None) -> Path:
|
|
"""Merge cached diarization + selected frames and write enhanced.txt."""
|
|
if not has_diarization(session_dir):
|
|
raise RuntimeError("No diarization available — run diarization first.")
|
|
diarized = load_diarization(session_dir)
|
|
audio_segs = merger.whisperx_to_audio_segments(diarized)
|
|
|
|
frames = load_frame_index(session_dir / "frames")
|
|
if selected_frame_ids is not None:
|
|
frames = [f for f in frames if f["id"] in selected_frame_ids]
|
|
frame_segs = [
|
|
{"timestamp": f["timestamp"], "frame_path": str(f["path"])}
|
|
for f in frames
|
|
]
|
|
|
|
merged = merger.merge(audio_segs, frame_segs, name_map=name_map)
|
|
return output.write_outputs(session_dir, merged)
|