add root readme
This commit is contained in:
102
cht/summary/diarize.py
Normal file
102
cht/summary/diarize.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""WhisperX subprocess wrapper for offline diarized transcription.
|
||||
|
||||
Runs whisperx CLI on a full-session WAV file, with min/max speakers pinned
|
||||
to the user-provided count. Streams stderr to a progress callback. Loads the
|
||||
resulting JSON and returns it.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
from cht import config
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _cudnn_lib_for(whisperx_bin: str) -> str | None:
|
||||
"""Find nvidia/cudnn/lib inside the venv that owns *whisperx_bin*.
|
||||
|
||||
whisperx ships with `nvidia-cudnn-cu12`; the runtime needs the .so files
|
||||
on LD_LIBRARY_PATH or it dies with a missing-symbol error.
|
||||
"""
|
||||
bin_path = Path(whisperx_bin).resolve()
|
||||
venv_root = bin_path.parent.parent # .../venv/def
|
||||
if not venv_root.exists():
|
||||
return None
|
||||
matches = list(venv_root.glob("lib/python*/site-packages/nvidia/cudnn/lib"))
|
||||
return str(matches[0]) if matches else None
|
||||
|
||||
|
||||
def run_whisperx(
|
||||
wav_path: Path,
|
||||
output_dir: Path,
|
||||
*,
|
||||
num_speakers: int,
|
||||
on_progress=None,
|
||||
) -> dict:
|
||||
"""Run whisperx diarization on `wav_path`. Returns parsed JSON.
|
||||
|
||||
Writes whisperx outputs into `output_dir`. Caller is responsible for
|
||||
persisting the relevant artifact elsewhere if desired.
|
||||
"""
|
||||
if not config.HF_TOKEN:
|
||||
raise RuntimeError(
|
||||
"HF_TOKEN environment variable is required for whisperx diarization."
|
||||
)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cmd = [
|
||||
config.WHISPERX_BIN,
|
||||
str(wav_path),
|
||||
"--model", config.WHISPERX_MODEL,
|
||||
"--device", config.WHISPERX_DEVICE,
|
||||
"--compute_type", config.WHISPERX_COMPUTE_TYPE,
|
||||
"--diarize",
|
||||
"--min_speakers", str(num_speakers),
|
||||
"--max_speakers", str(num_speakers),
|
||||
"--hf_token", config.HF_TOKEN,
|
||||
"--output_format", "json",
|
||||
"--output_dir", str(output_dir),
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
cudnn_path = config.WHISPERX_LD_LIBRARY_PATH or _cudnn_lib_for(config.WHISPERX_BIN)
|
||||
if cudnn_path:
|
||||
env["LD_LIBRARY_PATH"] = cudnn_path + os.pathsep + env.get("LD_LIBRARY_PATH", "")
|
||||
|
||||
log.info("whisperx: %s", " ".join(c for c in cmd if c != config.HF_TOKEN))
|
||||
if on_progress:
|
||||
on_progress("whisperx: starting", None)
|
||||
|
||||
proc = subprocess.Popen(
|
||||
cmd, env=env,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
text=True, bufsize=1,
|
||||
)
|
||||
|
||||
# Drain stderr/stdout combined; report progress lines.
|
||||
def _drain():
|
||||
for line in proc.stdout:
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
continue
|
||||
log.debug("[whisperx] %s", line)
|
||||
if on_progress:
|
||||
on_progress(line, None)
|
||||
|
||||
t = threading.Thread(target=_drain, daemon=True, name="whisperx_drain")
|
||||
t.start()
|
||||
proc.wait()
|
||||
t.join(timeout=2)
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"whisperx exited with status {proc.returncode}")
|
||||
|
||||
out_json = output_dir / f"{wav_path.stem}.json"
|
||||
if not out_json.exists():
|
||||
raise RuntimeError(f"whisperx finished but {out_json.name} not found")
|
||||
return json.loads(out_json.read_text())
|
||||
Reference in New Issue
Block a user