235 lines
8.5 KiB
Python
235 lines
8.5 KiB
Python
"""
|
|
Thin wrapper around ffmpeg-python for building and running ffmpeg pipelines.
|
|
|
|
All ffmpeg command construction goes through this module.
|
|
Uses ffmpeg-python's own run/run_async for subprocess management.
|
|
"""
|
|
|
|
import logging
|
|
import signal
|
|
import subprocess
|
|
|
|
import ffmpeg
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
GLOBAL_ARGS = ("-hide_banner",)
|
|
# Note: scene detection needs -loglevel info for showinfo filter output.
|
|
# Individual pipelines can override with .global_args()
|
|
QUIET_ARGS = ("-hide_banner", "-loglevel", "warning")
|
|
|
|
|
|
def receive_and_record(stream_url, output_path):
|
|
"""Receive mpegts stream and write to MKV file.
|
|
|
|
MKV (Matroska) is used because:
|
|
- Handles incomplete writes gracefully (like OBS default)
|
|
- Proper timestamps for seeking and duration detection
|
|
- mpv plays growing MKV files better than mpegts
|
|
"""
|
|
stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
|
|
return (
|
|
ffmpeg.output(
|
|
stream, str(output_path),
|
|
c="copy",
|
|
f="matroska",
|
|
flush_packets=1,
|
|
)
|
|
.global_args(*QUIET_ARGS)
|
|
)
|
|
|
|
|
|
def receive_record_and_relay(stream_url, output_path, relay_url):
|
|
"""Receive TCP stream, write to fragmented MP4, and relay to UDP loopback.
|
|
|
|
Fragmented MP4 (frag_keyframe+empty_moov) avoids MKV tail corruption:
|
|
each keyframe boundary closes a self-contained fragment, so the file is
|
|
always valid up to the last complete fragment (~1 keyframe interval ≈ 2s).
|
|
|
|
Uses ffmpeg tee via merge_outputs: one process, identical timestamps.
|
|
"""
|
|
stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
|
|
file_out = ffmpeg.output(
|
|
stream, str(output_path),
|
|
c="copy", f="mp4",
|
|
movflags="frag_keyframe+empty_moov+default_base_moof",
|
|
flush_packets=1,
|
|
**{"bsf:a": "aac_adtstoasc"},
|
|
)
|
|
relay_out = ffmpeg.output(
|
|
stream, relay_url,
|
|
c="copy", f="mpegts",
|
|
)
|
|
return ffmpeg.merge_outputs(file_out, relay_out).global_args(*QUIET_ARGS)
|
|
|
|
|
|
def receive_record_relay_and_detect(stream_url, output_path, relay_url,
|
|
scene_threshold=0.10):
|
|
"""Single process: receive TCP → record fMP4 + relay UDP + scene detect.
|
|
|
|
One ffmpeg process, three output branches from the same TCP input:
|
|
1. File output — c=copy to fMP4 (raw packets, no decode)
|
|
2. UDP relay — c=copy to mpegts for live display (raw packets)
|
|
3. Scene frames — Vulkan decode + scdet_vulkan (GPU scene comparison,
|
|
sc_pass=1 drops non-scene frames on GPU) → hwdownload (only scene
|
|
frames hit CPU) → showinfo → MJPEG piped to stdout
|
|
|
|
Scene frames are piped to stdout as image2pipe/mjpeg to avoid the image2
|
|
muxer's one-frame buffering delay. The caller reads JPEG data from stdout
|
|
and writes files itself. Stderr carries showinfo lines with timestamps.
|
|
Both stdout and stderr must be read continuously.
|
|
"""
|
|
stream = ffmpeg.input(
|
|
stream_url, fflags="nobuffer", flags="low_delay",
|
|
hwaccel="vulkan", hwaccel_output_format="vulkan",
|
|
)
|
|
|
|
# Copy outputs (raw packet remux, no decode)
|
|
file_out = ffmpeg.output(
|
|
stream, str(output_path),
|
|
c="copy", f="mp4",
|
|
movflags="frag_keyframe+empty_moov+default_base_moof",
|
|
flush_packets=1,
|
|
**{"bsf:a": "aac_adtstoasc"},
|
|
)
|
|
relay_out = ffmpeg.output(
|
|
stream, relay_url,
|
|
c="copy", f="mpegts",
|
|
)
|
|
|
|
# Scene detection on Vulkan GPU — only scene-change frames leave the GPU
|
|
scdet_threshold = scene_threshold * 100 # config 0-1 → scdet 0-100
|
|
scene_stream = (
|
|
stream
|
|
.filter("scdet_vulkan", threshold=scdet_threshold, sc_pass=1)
|
|
.filter("hwdownload")
|
|
.filter("format", "yuv420p")
|
|
.filter("showinfo")
|
|
)
|
|
scene_out = ffmpeg.output(
|
|
scene_stream, "pipe:1",
|
|
f="image2pipe", vcodec="mjpeg",
|
|
vsync="vfr", **{"q:v": "2"},
|
|
)
|
|
|
|
return ffmpeg.merge_outputs(file_out, relay_out, scene_out).global_args(*GLOBAL_ARGS)
|
|
|
|
|
|
def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
|
|
start_number=1, start_time=0.0, duration=None):
|
|
"""Extract frames from a file on scene change only (no interval fallback).
|
|
|
|
Frames are a chronological storyboard — captured whenever content changes
|
|
meaningfully vs the previous frame. No periodic fallback so static content
|
|
produces no spurious frames.
|
|
|
|
start_time/duration: applied via the select filter expression (NOT as -ss/-t
|
|
input options, which break scene detection on fragmented MP4).
|
|
Returns (stdout, stderr) as decoded strings for timestamp parsing.
|
|
"""
|
|
scene_expr = f"gt(scene,{scene_threshold})"
|
|
|
|
time_conditions = []
|
|
if start_time > 0:
|
|
time_conditions.append(f"gte(t,{start_time})")
|
|
if duration is not None:
|
|
time_conditions.append(f"lte(t,{start_time + duration})")
|
|
|
|
if time_conditions:
|
|
time_filter = "*".join(time_conditions)
|
|
select_expr = f"({scene_expr})*{time_filter}"
|
|
else:
|
|
select_expr = scene_expr
|
|
|
|
# CUDA hardware decode — GPU does h264 parsing, frames auto-transfer
|
|
# to CPU for the scene filter. Falls back to software if unavailable.
|
|
stream = ffmpeg.input(str(input_path), hwaccel="cuda")
|
|
stream = stream.filter("select", select_expr).filter("showinfo")
|
|
|
|
output = (
|
|
ffmpeg.output(
|
|
stream,
|
|
str(output_dir / "F%04d.jpg"),
|
|
vsync="vfr",
|
|
**{"q:v": "2"},
|
|
start_number=start_number,
|
|
)
|
|
.global_args(*GLOBAL_ARGS)
|
|
)
|
|
|
|
log.info("extract_scene_frames: %s", " ".join(output.compile()))
|
|
try:
|
|
stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
|
|
except ffmpeg.Error as e:
|
|
# ffmpeg may exit non-zero on growing files (corrupt tail) but still
|
|
# produce valid frames. Return the stderr for parsing anyway.
|
|
stderr = e.stderr or b""
|
|
err_text = stderr.decode("utf-8", errors="replace")
|
|
# Log the last meaningful line so we can see the real cause
|
|
for line in reversed(err_text.splitlines()):
|
|
if line.strip() and not line.startswith(" "):
|
|
log.debug("ffmpeg scene error: %s", line.strip())
|
|
break
|
|
stdout = e.stdout or b""
|
|
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
|
|
|
|
|
|
def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
|
|
"""Extract audio from recording as 16kHz mono WAV (optimal for Whisper).
|
|
|
|
Uses input-level seeking (-ss before -i) for fast keyframe-based seek.
|
|
Returns (stdout, stderr) as decoded strings.
|
|
"""
|
|
kwargs = {"ss": start_time}
|
|
if duration is not None:
|
|
kwargs["t"] = duration
|
|
stream = ffmpeg.input(str(input_path), **kwargs)
|
|
output = (
|
|
ffmpeg.output(
|
|
stream, str(output_path),
|
|
acodec="pcm_s16le", ac=1, ar=16000,
|
|
vn=None,
|
|
)
|
|
.overwrite_output()
|
|
.global_args(*QUIET_ARGS)
|
|
)
|
|
log.info("extract_audio_chunk: %s", " ".join(output.compile()))
|
|
try:
|
|
stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
|
|
except ffmpeg.Error as e:
|
|
stderr = e.stderr or b""
|
|
log.debug("ffmpeg audio error: %s", stderr.decode("utf-8", errors="replace").strip().split("\n")[-1])
|
|
stdout = e.stdout or b""
|
|
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
|
|
|
|
|
|
def extract_frame_at(input_path, output_path, timestamp):
|
|
"""Extract a single frame at the given timestamp."""
|
|
output = (
|
|
ffmpeg.input(str(input_path), ss=timestamp, hwaccel="cuda")
|
|
.output(str(output_path), vframes=1, **{"q:v": "2"})
|
|
.overwrite_output()
|
|
.global_args(*QUIET_ARGS)
|
|
)
|
|
log.info("extract_frame_at: %s", " ".join(output.compile()))
|
|
output.run(capture_stdout=True, capture_stderr=True)
|
|
|
|
|
|
def run_async(output_node, pipe_stdout=False, pipe_stderr=False):
|
|
"""Start an ffmpeg pipeline asynchronously via ffmpeg-python's run_async."""
|
|
log.info("run_async: %s", " ".join(output_node.compile()))
|
|
return output_node.run_async(
|
|
pipe_stdout=pipe_stdout,
|
|
pipe_stderr=pipe_stderr,
|
|
)
|
|
|
|
|
|
def stop_proc(proc, timeout=5):
|
|
"""Gracefully stop an ffmpeg subprocess."""
|
|
if proc and proc.poll() is None:
|
|
proc.send_signal(signal.SIGINT)
|
|
try:
|
|
proc.wait(timeout=timeout)
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|