audio and transcript
This commit is contained in:
@@ -122,6 +122,35 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
|
||||
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
|
||||
"""Extract audio from recording as 16kHz mono WAV (optimal for Whisper).
|
||||
|
||||
Uses input-level seeking (-ss before -i) for fast keyframe-based seek.
|
||||
Returns (stdout, stderr) as decoded strings.
|
||||
"""
|
||||
kwargs = {"ss": start_time}
|
||||
if duration is not None:
|
||||
kwargs["t"] = duration
|
||||
stream = ffmpeg.input(str(input_path), **kwargs)
|
||||
output = (
|
||||
ffmpeg.output(
|
||||
stream, str(output_path),
|
||||
acodec="pcm_s16le", ac=1, ar=16000,
|
||||
vn=None,
|
||||
)
|
||||
.overwrite_output()
|
||||
.global_args(*QUIET_ARGS)
|
||||
)
|
||||
log.info("extract_audio_chunk: %s", " ".join(output.compile()))
|
||||
try:
|
||||
stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
|
||||
except ffmpeg.Error as e:
|
||||
stderr = e.stderr or b""
|
||||
log.debug("ffmpeg audio error: %s", stderr.decode("utf-8", errors="replace").strip().split("\n")[-1])
|
||||
stdout = e.stdout or b""
|
||||
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def extract_frame_at(input_path, output_path, timestamp):
|
||||
"""Extract a single frame at the given timestamp."""
|
||||
output = (
|
||||
|
||||
@@ -20,6 +20,8 @@ from cht.config import (
|
||||
RELAY_PORT,
|
||||
SCENE_THRESHOLD,
|
||||
SESSIONS_DIR,
|
||||
AUDIO_EXTRACT_INTERVAL,
|
||||
AUDIO_SAFETY_MARGIN,
|
||||
)
|
||||
from cht.stream import ffmpeg as ff
|
||||
|
||||
@@ -46,6 +48,7 @@ class StreamManager:
|
||||
self.stream_dir = self.session_dir / "stream"
|
||||
self.frames_dir = self.session_dir / "frames"
|
||||
self.transcript_dir = self.session_dir / "transcript"
|
||||
self.audio_dir = self.session_dir / "audio"
|
||||
self.agent_dir = self.session_dir / "agent"
|
||||
|
||||
self._procs = {}
|
||||
@@ -103,7 +106,7 @@ class StreamManager:
|
||||
return total
|
||||
|
||||
def setup_dirs(self):
|
||||
for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.agent_dir):
|
||||
for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.audio_dir, self.agent_dir):
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@property
|
||||
@@ -349,6 +352,77 @@ class StreamManager:
|
||||
|
||||
Thread(target=_capture, daemon=True, name="capture_now").start()
|
||||
|
||||
# -- Audio Extraction --
|
||||
|
||||
def start_audio_extractor(self, on_new_audio=None):
|
||||
"""Periodically extract audio from the growing recording as WAV chunks.
|
||||
|
||||
Same incremental pattern as scene detector: polls recording, extracts
|
||||
new time range, calls back with (wav_path, start_time, duration).
|
||||
|
||||
Args:
|
||||
on_new_audio: callback(wav_path, start_time, duration)
|
||||
"""
|
||||
self._on_new_audio = on_new_audio
|
||||
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _extract():
|
||||
processed_time = 0.0
|
||||
chunk_num = 0
|
||||
current_segment = None
|
||||
|
||||
while "stop" not in self._stop_flags:
|
||||
time.sleep(AUDIO_EXTRACT_INTERVAL)
|
||||
|
||||
seg = self.recording_path
|
||||
if not seg.exists():
|
||||
continue
|
||||
|
||||
if seg != current_segment:
|
||||
current_segment = seg
|
||||
processed_time = 0.0
|
||||
chunk_num = 0
|
||||
log.info("Audio extractor: switched to %s", seg.name)
|
||||
|
||||
if seg.stat().st_size < 100_000:
|
||||
continue
|
||||
|
||||
safe_duration = self._estimate_safe_duration()
|
||||
if safe_duration is None or safe_duration <= 0:
|
||||
continue
|
||||
|
||||
process_to = safe_duration - AUDIO_SAFETY_MARGIN
|
||||
if process_to <= processed_time + 1.0:
|
||||
continue
|
||||
|
||||
chunk_duration = process_to - processed_time
|
||||
wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
|
||||
|
||||
try:
|
||||
ff.extract_audio_chunk(
|
||||
seg, wav_path,
|
||||
start_time=processed_time,
|
||||
duration=chunk_duration,
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("Audio extraction failed: %s", e)
|
||||
continue
|
||||
|
||||
if wav_path.exists() and wav_path.stat().st_size > 100:
|
||||
log.info("Audio chunk: %s (%.1fs → %.1fs)",
|
||||
wav_path.name, processed_time, process_to)
|
||||
if self._on_new_audio:
|
||||
self._on_new_audio(wav_path, processed_time, chunk_duration)
|
||||
chunk_num += 1
|
||||
|
||||
processed_time = process_to
|
||||
|
||||
log.info("Audio extractor stopped")
|
||||
|
||||
t = Thread(target=_extract, daemon=True, name="audio_extractor")
|
||||
t.start()
|
||||
self._threads["audio_extractor"] = t
|
||||
|
||||
# -- Lifecycle --
|
||||
|
||||
def stop_all(self):
|
||||
|
||||
Reference in New Issue
Block a user