audio and transcript

2026-04-02 22:57:21 -03:00
parent 0b5575f3b3
commit d61e2a5492
13 changed files with 556 additions and 11 deletions
--- a/cht/stream/manager.py
+++ b/cht/stream/manager.py
@@ -20,6 +20,8 @@ from cht.config import (
    RELAY_PORT,
    SCENE_THRESHOLD,
    SESSIONS_DIR,
+    AUDIO_EXTRACT_INTERVAL,
+    AUDIO_SAFETY_MARGIN,
 )
 from cht.stream import ffmpeg as ff

@@ -46,6 +48,7 @@ class StreamManager:
        self.stream_dir = self.session_dir / "stream"
        self.frames_dir = self.session_dir / "frames"
        self.transcript_dir = self.session_dir / "transcript"
+        self.audio_dir = self.session_dir / "audio"
        self.agent_dir = self.session_dir / "agent"

        self._procs = {}
@@ -103,7 +106,7 @@ class StreamManager:
        return total

    def setup_dirs(self):
-        for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.agent_dir):
+        for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.audio_dir, self.agent_dir):
            d.mkdir(parents=True, exist_ok=True)

    @property
@@ -349,6 +352,77 @@ class StreamManager:

        Thread(target=_capture, daemon=True, name="capture_now").start()

+    # -- Audio Extraction --
+
+    def start_audio_extractor(self, on_new_audio=None):
+        """Periodically extract audio from the growing recording as WAV chunks.
+
+        Same incremental pattern as scene detector: polls recording, extracts
+        new time range, calls back with (wav_path, start_time, duration).
+
+        Args:
+            on_new_audio: callback(wav_path, start_time, duration)
+        """
+        self._on_new_audio = on_new_audio
+        self.audio_dir.mkdir(parents=True, exist_ok=True)
+
+        def _extract():
+            processed_time = 0.0
+            chunk_num = 0
+            current_segment = None
+
+            while "stop" not in self._stop_flags:
+                time.sleep(AUDIO_EXTRACT_INTERVAL)
+
+                seg = self.recording_path
+                if not seg.exists():
+                    continue
+
+                if seg != current_segment:
+                    current_segment = seg
+                    processed_time = 0.0
+                    chunk_num = 0
+                    log.info("Audio extractor: switched to %s", seg.name)
+
+                if seg.stat().st_size < 100_000:
+                    continue
+
+                safe_duration = self._estimate_safe_duration()
+                if safe_duration is None or safe_duration <= 0:
+                    continue
+
+                process_to = safe_duration - AUDIO_SAFETY_MARGIN
+                if process_to <= processed_time + 1.0:
+                    continue
+
+                chunk_duration = process_to - processed_time
+                wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
+
+                try:
+                    ff.extract_audio_chunk(
+                        seg, wav_path,
+                        start_time=processed_time,
+                        duration=chunk_duration,
+                    )
+                except Exception as e:
+                    log.error("Audio extraction failed: %s", e)
+                    continue
+
+                if wav_path.exists() and wav_path.stat().st_size > 100:
+                    log.info("Audio chunk: %s (%.1fs → %.1fs)",
+                             wav_path.name, processed_time, process_to)
+                    if self._on_new_audio:
+                        self._on_new_audio(wav_path, processed_time, chunk_duration)
+                    chunk_num += 1
+
+                processed_time = process_to
+
+            log.info("Audio extractor stopped")
+
+        t = Thread(target=_extract, daemon=True, name="audio_extractor")
+        t.start()
+        self._threads["audio_extractor"] = t
+
    # -- Lifecycle --

    def stop_all(self):