asdfasdfawds

2026-04-03 10:21:51 -03:00
parent 3f76670169
commit fbf9984a5d
7 changed files with 92 additions and 35 deletions
--- a/cht/stream/ffmpeg.py
+++ b/cht/stream/ffmpeg.py
@@ -39,15 +39,15 @@ def receive_and_record(stream_url, output_path):
    )


-def receive_record_and_relay(stream_url, output_path, relay_url):
+def receive_record_and_relay(stream_url, output_path, relay_url, scene_relay_url=None):
    """Receive TCP stream, write to fragmented MP4, and relay to UDP loopback.

    Fragmented MP4 (frag_keyframe+empty_moov) avoids MKV tail corruption:
    each keyframe boundary closes a self-contained fragment, so the file is
    always valid up to the last complete fragment (~1 keyframe interval ≈ 2s).
-    This allows the scene detector to use a 2s safety margin instead of 6s.

    Uses ffmpeg tee via merge_outputs: one process, identical timestamps.
+    Optionally sends a second relay for the scene detector.
    """
    stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
    file_out = ffmpeg.output(
@@ -61,7 +61,50 @@ def receive_record_and_relay(stream_url, output_path, relay_url):
        stream, relay_url,
        c="copy", f="mpegts",
    )
-    return ffmpeg.merge_outputs(file_out, relay_out).global_args(*QUIET_ARGS)
+    outputs = [file_out, relay_out]
+    if scene_relay_url:
+        scene_out = ffmpeg.output(
+            stream, scene_relay_url,
+            c="copy", f="mpegts",
+        )
+        outputs.append(scene_out)
+    return ffmpeg.merge_outputs(*outputs).global_args(*QUIET_ARGS)
+
+
+def start_live_scene_detector(stream_url, output_dir, scene_threshold=0.10,
+                              start_number=1):
+    """Start a persistent ffmpeg process that detects scenes from a live stream.
+
+    Reads from the UDP relay in real-time — no file seeking, no restart overhead.
+    Writes frame JPEGs and emits showinfo on stderr as scenes are detected.
+    Returns the async process (stderr must be read continuously).
+    """
+    select_expr = f"gt(scene,{scene_threshold})"
+
+    stream = ffmpeg.input(
+        stream_url,
+        fflags="nobuffer+flush_packets",
+        flags="low_delay",
+        probesize="32000",
+        analyzeduration="0",
+        hwaccel="cuda",
+    )
+    stream = stream.filter("select", select_expr).filter("showinfo")
+
+    output = (
+        ffmpeg.output(
+            stream,
+            str(output_dir / "F%04d.jpg"),
+            vsync="vfr",
+            flush_packets=1,
+            **{"q:v": "2"},
+            start_number=start_number,
+        )
+        .global_args(*GLOBAL_ARGS)
+    )
+
+    log.info("start_live_scene_detector: %s", " ".join(output.compile()))
+    return run_async(output, pipe_stderr=True)


 def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
@@ -72,23 +115,28 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
    meaningfully vs the previous frame. No periodic fallback so static content
    produces no spurious frames.

-    Uses -ss input seeking for O(1) startup regardless of file size.
-    pts_time in showinfo output is relative to the seek point.
+    start_time/duration: applied via the select filter expression (NOT as -ss/-t
+    input options, which break scene detection on fragmented MP4).
    Returns (stdout, stderr) as decoded strings for timestamp parsing.
    """
    scene_expr = f"gt(scene,{scene_threshold})"

-    # With -ss input seeking, t starts at 0 from the seek point.
-    # Only need end boundary (duration), start is handled by -ss.
-    if duration is not None:
-        scene_expr = f"({scene_expr})*lte(t,{duration})"
-
-    input_kwargs = {}
+    time_conditions = []
    if start_time > 0:
-        input_kwargs["ss"] = start_time
+        time_conditions.append(f"gte(t,{start_time})")
+    if duration is not None:
+        time_conditions.append(f"lte(t,{start_time + duration})")

-    stream = ffmpeg.input(str(input_path), **input_kwargs)
-    stream = stream.filter("select", scene_expr).filter("showinfo")
+    if time_conditions:
+        time_filter = "*".join(time_conditions)
+        select_expr = f"({scene_expr})*{time_filter}"
+    else:
+        select_expr = scene_expr
+
+    # CUDA hardware decode — GPU does h264 parsing, frames auto-transfer
+    # to CPU for the scene filter. Falls back to software if unavailable.
+    stream = ffmpeg.input(str(input_path), hwaccel="cuda")
+    stream = stream.filter("select", select_expr).filter("showinfo")

    output = (
        ffmpeg.output(
@@ -150,7 +198,7 @@ def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
 def extract_frame_at(input_path, output_path, timestamp):
    """Extract a single frame at the given timestamp."""
    output = (
-        ffmpeg.input(str(input_path), ss=timestamp)
+        ffmpeg.input(str(input_path), ss=timestamp, hwaccel="cuda")
        .output(str(output_path), vframes=1, **{"q:v": "2"})
        .overwrite_output()
        .global_args(*QUIET_ARGS)