audio and transcript

This commit is contained in:
2026-04-02 22:57:21 -03:00
parent 0b5575f3b3
commit d61e2a5492
13 changed files with 556 additions and 11 deletions

View File

@@ -20,6 +20,8 @@ from cht.config import (
RELAY_PORT,
SCENE_THRESHOLD,
SESSIONS_DIR,
AUDIO_EXTRACT_INTERVAL,
AUDIO_SAFETY_MARGIN,
)
from cht.stream import ffmpeg as ff
@@ -46,6 +48,7 @@ class StreamManager:
self.stream_dir = self.session_dir / "stream"
self.frames_dir = self.session_dir / "frames"
self.transcript_dir = self.session_dir / "transcript"
self.audio_dir = self.session_dir / "audio"
self.agent_dir = self.session_dir / "agent"
self._procs = {}
@@ -103,7 +106,7 @@ class StreamManager:
return total
def setup_dirs(self):
for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.agent_dir):
for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.audio_dir, self.agent_dir):
d.mkdir(parents=True, exist_ok=True)
@property
@@ -349,6 +352,77 @@ class StreamManager:
Thread(target=_capture, daemon=True, name="capture_now").start()
# -- Audio Extraction --
def start_audio_extractor(self, on_new_audio=None):
"""Periodically extract audio from the growing recording as WAV chunks.
Same incremental pattern as scene detector: polls recording, extracts
new time range, calls back with (wav_path, start_time, duration).
Args:
on_new_audio: callback(wav_path, start_time, duration)
"""
self._on_new_audio = on_new_audio
self.audio_dir.mkdir(parents=True, exist_ok=True)
def _extract():
processed_time = 0.0
chunk_num = 0
current_segment = None
while "stop" not in self._stop_flags:
time.sleep(AUDIO_EXTRACT_INTERVAL)
seg = self.recording_path
if not seg.exists():
continue
if seg != current_segment:
current_segment = seg
processed_time = 0.0
chunk_num = 0
log.info("Audio extractor: switched to %s", seg.name)
if seg.stat().st_size < 100_000:
continue
safe_duration = self._estimate_safe_duration()
if safe_duration is None or safe_duration <= 0:
continue
process_to = safe_duration - AUDIO_SAFETY_MARGIN
if process_to <= processed_time + 1.0:
continue
chunk_duration = process_to - processed_time
wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
try:
ff.extract_audio_chunk(
seg, wav_path,
start_time=processed_time,
duration=chunk_duration,
)
except Exception as e:
log.error("Audio extraction failed: %s", e)
continue
if wav_path.exists() and wav_path.stat().st_size > 100:
log.info("Audio chunk: %s (%.1fs → %.1fs)",
wav_path.name, processed_time, process_to)
if self._on_new_audio:
self._on_new_audio(wav_path, processed_time, chunk_duration)
chunk_num += 1
processed_time = process_to
log.info("Audio extractor stopped")
t = Thread(target=_extract, daemon=True, name="audio_extractor")
t.start()
self._threads["audio_extractor"] = t
# -- Lifecycle --
def stop_all(self):