""" Thin wrapper around ffmpeg-python for building and running ffmpeg pipelines. All ffmpeg command construction goes through this module. Uses ffmpeg-python's own run/run_async for subprocess management. """ import logging import signal import subprocess import ffmpeg log = logging.getLogger(__name__) GLOBAL_ARGS = ("-hide_banner",) # Note: scene detection needs -loglevel info for showinfo filter output. # Individual pipelines can override with .global_args() QUIET_ARGS = ("-hide_banner", "-loglevel", "warning") def receive_and_record(stream_url, output_path): """Receive mpegts stream and write to MKV file. MKV (Matroska) is used because: - Handles incomplete writes gracefully (like OBS default) - Proper timestamps for seeking and duration detection - mpv plays growing MKV files better than mpegts """ stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay") return ( ffmpeg.output( stream, str(output_path), c="copy", f="matroska", flush_packets=1, ) .global_args(*QUIET_ARGS) ) def receive_record_and_relay(stream_url, output_path, relay_url): """Receive TCP stream, write to fragmented MP4, and relay to UDP loopback. Fragmented MP4 (frag_keyframe+empty_moov) avoids MKV tail corruption: each keyframe boundary closes a self-contained fragment, so the file is always valid up to the last complete fragment (~1 keyframe interval ≈ 2s). Uses ffmpeg tee via merge_outputs: one process, identical timestamps. """ stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay") file_out = ffmpeg.output( stream, str(output_path), c="copy", f="mp4", movflags="frag_keyframe+empty_moov+default_base_moof", flush_packets=1, **{"bsf:a": "aac_adtstoasc"}, ) relay_out = ffmpeg.output( stream, relay_url, c="copy", f="mpegts", ) return ffmpeg.merge_outputs(file_out, relay_out).global_args(*QUIET_ARGS) def receive_record_relay_and_detect(stream_url, output_path, relay_url, scene_threshold=0.10, flush_frames=2): """Single process: receive TCP → record fMP4 + relay UDP + scene detect. One ffmpeg process, three output branches from the same TCP input: 1. File output — c=copy to fMP4 (raw packets, no decode) 2. UDP relay — c=copy to mpegts for live display (raw packets) 3. Scene frames — CUDA decode (GPU) → select(scene) + showinfo (CPU) → MJPEG piped to stdout Scene frames are piped to stdout as image2pipe/mjpeg to avoid the image2 muxer's one-frame buffering delay. The caller reads JPEG data from stdout and writes files itself. Stderr carries showinfo lines with timestamps. Both stdout and stderr must be read continuously. """ stream = ffmpeg.input( stream_url, fflags="nobuffer", flags="low_delay", hwaccel="cuda", ) # Copy outputs (raw packet remux, no decode) file_out = ffmpeg.output( stream, str(output_path), c="copy", f="mp4", movflags="frag_keyframe+empty_moov+default_base_moof", flush_packets=1, **{"bsf:a": "aac_adtstoasc"}, ) relay_out = ffmpeg.output( stream, relay_url, c="copy", f="mpegts", ) # Scene detection: CUDA decode (GPU) → select filter (CPU, lightweight) # → showinfo → MJPEG piped to stdout # scene_expr = f"gt(scene,{scene_threshold})" if flush_frames > 0: # Flush trick: select extra frames after each scene change to push # the real frame through the encoder+muxer buffer pipeline. # mod(selected_n, 1+flush_frames) prevents chaining. mod_val = 1 + flush_frames flush_expr = f"eq(n,prev_selected_n+1)*mod(selected_n,{mod_val})" select_expr = f"{scene_expr}+{flush_expr}" else: select_expr = scene_expr scene_stream = stream.filter("select", select_expr).filter("showinfo") scene_out = ffmpeg.output( scene_stream, "pipe:1", f="image2pipe", vcodec="mjpeg", flush_packets=1, **{"q:v": "2", "fps_mode": "passthrough"}, ) return ffmpeg.merge_outputs(file_out, relay_out, scene_out).global_args(*GLOBAL_ARGS) def extract_scene_frames(input_path, output_dir, scene_threshold=0.10, start_number=1, start_time=0.0, duration=None): """Extract frames from a file on scene change only (no interval fallback). Frames are a chronological storyboard — captured whenever content changes meaningfully vs the previous frame. No periodic fallback so static content produces no spurious frames. start_time/duration: applied via the select filter expression (NOT as -ss/-t input options, which break scene detection on fragmented MP4). Returns (stdout, stderr) as decoded strings for timestamp parsing. """ scene_expr = f"gt(scene,{scene_threshold})" time_conditions = [] if start_time > 0: time_conditions.append(f"gte(t,{start_time})") if duration is not None: time_conditions.append(f"lte(t,{start_time + duration})") if time_conditions: time_filter = "*".join(time_conditions) select_expr = f"({scene_expr})*{time_filter}" else: select_expr = scene_expr # CUDA hardware decode — GPU does h264 parsing, frames auto-transfer # to CPU for the scene filter. Falls back to software if unavailable. stream = ffmpeg.input(str(input_path), hwaccel="cuda") stream = stream.filter("select", select_expr).filter("showinfo") output = ( ffmpeg.output( stream, str(output_dir / "F%04d.jpg"), vsync="vfr", **{"q:v": "2"}, start_number=start_number, ) .global_args(*GLOBAL_ARGS) ) log.info("extract_scene_frames: %s", " ".join(output.compile())) try: stdout, stderr = output.run(capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: # ffmpeg may exit non-zero on growing files (corrupt tail) but still # produce valid frames. Return the stderr for parsing anyway. stderr = e.stderr or b"" err_text = stderr.decode("utf-8", errors="replace") # Log the last meaningful line so we can see the real cause for line in reversed(err_text.splitlines()): if line.strip() and not line.startswith(" "): log.debug("ffmpeg scene error: %s", line.strip()) break stdout = e.stdout or b"" return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace") def detect_scenes_from_pipe(scene_threshold=0.10, flush_frames=2, fps=30): """Scene-detect from piped raw H.264 on stdin. Returns a node for run_async. Used when Rust server provides a live H.264 stream via Unix socket. Caller bridges the socket to ffmpeg's stdin and reads stdout/stderr: - stdin: raw H.264 from the socket - stdout: MJPEG pipe (JPEG frames on scene change) - stderr: showinfo lines with pts_time timestamps """ stream = ffmpeg.input("pipe:0", f="h264", framerate=fps, hwaccel="cuda") scene_expr = f"gt(scene,{scene_threshold})" if flush_frames > 0: mod_val = 1 + flush_frames flush_expr = f"eq(n,prev_selected_n+1)*mod(selected_n,{mod_val})" select_expr = f"{scene_expr}+{flush_expr}" else: select_expr = scene_expr scene_stream = stream.filter("select", select_expr).filter("showinfo") return ffmpeg.output( scene_stream, "pipe:1", f="image2pipe", vcodec="mjpeg", flush_packets=1, strict="unofficial", **{"q:v": "2", "fps_mode": "passthrough"}, ).global_args(*GLOBAL_ARGS) def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None): """Extract audio from recording as 16kHz mono WAV (optimal for Whisper). Uses input-level seeking (-ss before -i) for fast keyframe-based seek. Supports fMP4 (auto-detect) and raw AAC files (explicit format hint). Returns (stdout, stderr) as decoded strings. """ kwargs = {"ss": start_time} if duration is not None: kwargs["t"] = duration # Raw AAC files need explicit format hint if str(input_path).endswith(".aac"): kwargs["f"] = "aac" stream = ffmpeg.input(str(input_path), **kwargs) output = ( ffmpeg.output( stream, str(output_path), acodec="pcm_s16le", ac=1, ar=16000, vn=None, ) .overwrite_output() .global_args(*QUIET_ARGS) ) log.info("extract_audio_chunk: %s", " ".join(output.compile())) try: stdout, stderr = output.run(capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: stderr = e.stderr or b"" log.debug("ffmpeg audio error: %s", stderr.decode("utf-8", errors="replace").strip().split("\n")[-1]) stdout = e.stdout or b"" return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace") def extract_frame_at(input_path, output_path, timestamp): """Extract a single frame at the given timestamp.""" output = ( ffmpeg.input(str(input_path), ss=timestamp, hwaccel="cuda") .output(str(output_path), vframes=1, **{"q:v": "2"}) .overwrite_output() .global_args(*QUIET_ARGS) ) log.info("extract_frame_at: %s", " ".join(output.compile())) output.run(capture_stdout=True, capture_stderr=True) def run_async(output_node, pipe_stdin=False, pipe_stdout=False, pipe_stderr=False): """Start an ffmpeg pipeline asynchronously via ffmpeg-python's run_async.""" log.info("run_async: %s", " ".join(output_node.compile())) return output_node.run_async( pipe_stdin=pipe_stdin, pipe_stdout=pipe_stdout, pipe_stderr=pipe_stderr, ) def stop_proc(proc, timeout=5): """Gracefully stop an ffmpeg subprocess.""" if proc and proc.poll() is None: proc.send_signal(signal.SIGINT) try: proc.wait(timeout=timeout) except subprocess.TimeoutExpired: proc.kill()