audio and transcript

This commit is contained in:
2026-04-02 22:57:21 -03:00
parent 0b5575f3b3
commit d61e2a5492
13 changed files with 556 additions and 11 deletions

View File

@@ -122,6 +122,35 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
"""Extract audio from recording as 16kHz mono WAV (optimal for Whisper).
Uses input-level seeking (-ss before -i) for fast keyframe-based seek.
Returns (stdout, stderr) as decoded strings.
"""
kwargs = {"ss": start_time}
if duration is not None:
kwargs["t"] = duration
stream = ffmpeg.input(str(input_path), **kwargs)
output = (
ffmpeg.output(
stream, str(output_path),
acodec="pcm_s16le", ac=1, ar=16000,
vn=None,
)
.overwrite_output()
.global_args(*QUIET_ARGS)
)
log.info("extract_audio_chunk: %s", " ".join(output.compile()))
try:
stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
except ffmpeg.Error as e:
stderr = e.stderr or b""
log.debug("ffmpeg audio error: %s", stderr.decode("utf-8", errors="replace").strip().split("\n")[-1])
stdout = e.stdout or b""
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
def extract_frame_at(input_path, output_path, timestamp):
"""Extract a single frame at the given timestamp."""
output = (