"""Assemble a single WAV file covering the entire session audio. Prefers the recording source (fMP4 or raw AAC) over the live-extracted WAV chunks: a single decode pass gives whisperx contiguous audio with no chunk-boundary artifacts. Chunks are a fallback when the recording source is missing. """ import logging import tempfile from pathlib import Path import ffmpeg from cht.stream import ffmpeg as ff log = logging.getLogger(__name__) def assemble_session_wav(session_dir: Path, *, force: bool = False) -> Path: """Build `summary/full.wav` covering the whole session audio. Returns the cached path if already present and `force` is False. Raises FileNotFoundError if no usable audio source exists. """ summary_dir = session_dir / "summary" summary_dir.mkdir(parents=True, exist_ok=True) out = summary_dir / "full.wav" if out.exists() and not force: log.info("assemble_session_wav: cached %s", out) return out stream_dir = session_dir / "stream" # 1. Rust transport: standalone audio.aac. aac = stream_dir / "audio.aac" if aac.exists() and aac.stat().st_size > 100: ff.extract_audio_chunk(aac, out) log.info("assemble_session_wav: from audio.aac → %s", out) return out # 2. fMP4 segments (Python transport). Single segment is the common case. segments = sorted(stream_dir.glob("recording_*.mp4")) if stream_dir.exists() else [] if len(segments) == 1: ff.extract_audio_chunk(segments[0], out) log.info("assemble_session_wav: from %s → %s", segments[0].name, out) return out if len(segments) > 1: _concat_segments_audio(segments, out) log.info("assemble_session_wav: concatenated %d segments → %s", len(segments), out) return out # 3. Fallback: concat the live audio chunks. Last resort — chunk seams may # introduce minor artifacts; whisperx still works but precision can suffer. audio_dir = session_dir / "audio" chunks = sorted(audio_dir.glob("chunk_*.wav")) if audio_dir.exists() else [] if chunks: log.warning("assemble_session_wav: no recording source, falling back to %d chunks", len(chunks)) _concat_chunks(chunks, out) return out raise FileNotFoundError(f"No audio source found in {session_dir}") def _concat_segments_audio(segments: list[Path], out: Path) -> None: """Decode + concatenate audio tracks from multiple fMP4 segments into 16kHz mono WAV.""" inputs = [ffmpeg.input(str(p)) for p in segments] audio_streams = [s.audio for s in inputs] node = ( ffmpeg.concat(*audio_streams, v=0, a=1) .output(str(out), acodec="pcm_s16le", ac=1, ar=16000) .overwrite_output() .global_args("-hide_banner", "-loglevel", "warning") ) log.info("concat_segments_audio: %s", " ".join(node.compile())) node.run(capture_stdout=True, capture_stderr=True) def _concat_chunks(chunks: list[Path], out: Path) -> None: """Concat already-PCM 16kHz mono WAV files via the concat demuxer (no re-decode).""" with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f: listfile = Path(f.name) for c in chunks: f.write(f"file '{c.resolve()}'\n") try: node = ( ffmpeg.input(str(listfile), format="concat", safe=0) .output(str(out), c="copy") .overwrite_output() .global_args("-hide_banner", "-loglevel", "warning") ) log.info("concat_chunks: %s", " ".join(node.compile())) node.run(capture_stdout=True, capture_stderr=True) finally: try: listfile.unlink() except OSError: pass