mitus/cht/stream/ffmpeg.py

"""
Thin wrapper around ffmpeg-python for building and running ffmpeg pipelines.

All ffmpeg command construction goes through this module.
Uses ffmpeg-python's own run/run_async for subprocess management.
"""

import logging
import signal
import subprocess

import ffmpeg

log = logging.getLogger(__name__)

GLOBAL_ARGS = ("-hide_banner",)
# Note: scene detection needs -loglevel info for showinfo filter output.
# Individual pipelines can override with .global_args()
QUIET_ARGS = ("-hide_banner", "-loglevel", "warning")


def receive_and_record(stream_url, output_path):
    """Receive mpegts stream and write to MKV file.

    MKV (Matroska) is used because:
    - Handles incomplete writes gracefully (like OBS default)
    - Proper timestamps for seeking and duration detection
    - mpv plays growing MKV files better than mpegts
    """
    stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
    return (
        ffmpeg.output(
            stream, str(output_path),
            c="copy",
            f="matroska",
            flush_packets=1,
        )
        .global_args(*QUIET_ARGS)
    )


def receive_record_and_relay(stream_url, output_path, relay_url):
    """Receive TCP stream, write to fragmented MP4, and relay to UDP loopback.

    Fragmented MP4 (frag_keyframe+empty_moov) avoids MKV tail corruption:
    each keyframe boundary closes a self-contained fragment, so the file is
    always valid up to the last complete fragment (~1 keyframe interval ≈ 2s).

    Uses ffmpeg tee via merge_outputs: one process, identical timestamps.
    """
    stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
    file_out = ffmpeg.output(
        stream, str(output_path),
        c="copy", f="mp4",
        movflags="frag_keyframe+empty_moov+default_base_moof",
        flush_packets=1,
        **{"bsf:a": "aac_adtstoasc"},
    )
    relay_out = ffmpeg.output(
        stream, relay_url,
        c="copy", f="mpegts",
    )
    return ffmpeg.merge_outputs(file_out, relay_out).global_args(*QUIET_ARGS)


def receive_record_relay_and_detect(stream_url, output_path, relay_url,
                                    scene_threshold=0.10, flush_frames=2):
    """Single process: receive TCP → record fMP4 + relay UDP + scene detect.

    One ffmpeg process, three output branches from the same TCP input:
    1. File output — c=copy to fMP4 (raw packets, no decode)
    2. UDP relay — c=copy to mpegts for live display (raw packets)
    3. Scene frames — CUDA decode (GPU) → select(scene) + showinfo (CPU)
       → MJPEG piped to stdout

    Scene frames are piped to stdout as image2pipe/mjpeg to avoid the image2
    muxer's one-frame buffering delay. The caller reads JPEG data from stdout
    and writes files itself. Stderr carries showinfo lines with timestamps.
    Both stdout and stderr must be read continuously.
    """
    stream = ffmpeg.input(
        stream_url, fflags="nobuffer", flags="low_delay",
        hwaccel="cuda",
    )

    # Copy outputs (raw packet remux, no decode)
    file_out = ffmpeg.output(
        stream, str(output_path),
        c="copy", f="mp4",
        movflags="frag_keyframe+empty_moov+default_base_moof",
        flush_packets=1,
        **{"bsf:a": "aac_adtstoasc"},
    )
    relay_out = ffmpeg.output(
        stream, relay_url,
        c="copy", f="mpegts",
    )

    # Scene detection: CUDA decode (GPU) → select filter (CPU, lightweight)
    # → showinfo → MJPEG piped to stdout
    #
    scene_expr = f"gt(scene,{scene_threshold})"
    if flush_frames > 0:
        # Flush trick: select extra frames after each scene change to push
        # the real frame through the encoder+muxer buffer pipeline.
        # mod(selected_n, 1+flush_frames) prevents chaining.
        mod_val = 1 + flush_frames
        flush_expr = f"eq(n,prev_selected_n+1)*mod(selected_n,{mod_val})"
        select_expr = f"{scene_expr}+{flush_expr}"
    else:
        select_expr = scene_expr
    scene_stream = stream.filter("select", select_expr).filter("showinfo")
    scene_out = ffmpeg.output(
        scene_stream, "pipe:1",
        f="image2pipe", vcodec="mjpeg",
        flush_packets=1, **{"q:v": "2", "fps_mode": "passthrough"},
    )

    return ffmpeg.merge_outputs(file_out, relay_out, scene_out).global_args(*GLOBAL_ARGS)


def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
                         start_number=1, start_time=0.0, duration=None):
    """Extract frames from a file on scene change only (no interval fallback).

    Frames are a chronological storyboard — captured whenever content changes
    meaningfully vs the previous frame. No periodic fallback so static content
    produces no spurious frames.

    start_time/duration: applied via the select filter expression (NOT as -ss/-t
    input options, which break scene detection on fragmented MP4).
    Returns (stdout, stderr) as decoded strings for timestamp parsing.
    """
    scene_expr = f"gt(scene,{scene_threshold})"

    time_conditions = []
    if start_time > 0:
        time_conditions.append(f"gte(t,{start_time})")
    if duration is not None:
        time_conditions.append(f"lte(t,{start_time + duration})")

    if time_conditions:
        time_filter = "*".join(time_conditions)
        select_expr = f"({scene_expr})*{time_filter}"
    else:
        select_expr = scene_expr

    # CUDA hardware decode — GPU does h264 parsing, frames auto-transfer
    # to CPU for the scene filter. Falls back to software if unavailable.
    stream = ffmpeg.input(str(input_path), hwaccel="cuda")
    stream = stream.filter("select", select_expr).filter("showinfo")

    output = (
        ffmpeg.output(
            stream,
            str(output_dir / "F%04d.jpg"),
            vsync="vfr",
            **{"q:v": "2"},
            start_number=start_number,
        )
        .global_args(*GLOBAL_ARGS)
    )

    log.info("extract_scene_frames: %s", " ".join(output.compile()))
    try:
        stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
    except ffmpeg.Error as e:
        # ffmpeg may exit non-zero on growing files (corrupt tail) but still
        # produce valid frames. Return the stderr for parsing anyway.
        stderr = e.stderr or b""
        err_text = stderr.decode("utf-8", errors="replace")
        # Log the last meaningful line so we can see the real cause
        for line in reversed(err_text.splitlines()):
            if line.strip() and not line.startswith("  "):
                log.debug("ffmpeg scene error: %s", line.strip())
                break
        stdout = e.stdout or b""
    return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")


def detect_scenes_from_pipe(scene_threshold=0.10, flush_frames=2, fps=30):
    """Scene-detect from piped raw H.264 on stdin. Returns a node for run_async.

    Used when Rust server provides a live H.264 stream via Unix socket.
    Caller bridges the socket to ffmpeg's stdin and reads stdout/stderr:
      - stdin:  raw H.264 from the socket
      - stdout: MJPEG pipe (JPEG frames on scene change)
      - stderr: showinfo lines with pts_time timestamps
    """
    stream = ffmpeg.input(
        "pipe:0", f="h264", framerate=fps, hwaccel="cuda",
        fflags="nobuffer", probesize=32, analyzeduration=0,
    )
    scene_expr = f"gt(scene,{scene_threshold})"
    if flush_frames > 0:
        mod_val = 1 + flush_frames
        flush_expr = f"eq(n,prev_selected_n+1)*mod(selected_n,{mod_val})"
        select_expr = f"{scene_expr}+{flush_expr}"
    else:
        select_expr = scene_expr
    scene_stream = stream.filter("select", select_expr).filter("showinfo")
    return ffmpeg.output(
        scene_stream, "pipe:1",
        f="image2pipe", vcodec="mjpeg",
        flush_packets=1, strict="unofficial",
        **{"q:v": "2", "fps_mode": "passthrough"},
    ).global_args(*GLOBAL_ARGS)


def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
    """Extract audio from recording as 16kHz mono WAV (optimal for Whisper).

    Uses input-level seeking (-ss before -i) for fast keyframe-based seek.
    Supports fMP4 (auto-detect) and raw AAC files (explicit format hint).
    Returns (stdout, stderr) as decoded strings.
    """
    kwargs = {"ss": start_time}
    if duration is not None:
        kwargs["t"] = duration
    # Raw AAC files need explicit format hint
    if str(input_path).endswith(".aac"):
        kwargs["f"] = "aac"
    stream = ffmpeg.input(str(input_path), **kwargs)
    output = (
        ffmpeg.output(
            stream, str(output_path),
            acodec="pcm_s16le", ac=1, ar=16000,
            vn=None,
        )
        .overwrite_output()
        .global_args(*QUIET_ARGS)
    )
    log.info("extract_audio_chunk: %s", " ".join(output.compile()))
    try:
        stdout, stderr = output.run(capture_stdout=True, capture_stderr=True)
    except ffmpeg.Error as e:
        stderr = e.stderr or b""
        log.debug("ffmpeg audio error: %s", stderr.decode("utf-8", errors="replace").strip().split("\n")[-1])
        stdout = e.stdout or b""
    return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")


def extract_frame_at(input_path, output_path, timestamp):
    """Extract a single frame at the given timestamp."""
    output = (
        ffmpeg.input(str(input_path), ss=timestamp, hwaccel="cuda")
        .output(str(output_path), vframes=1, **{"q:v": "2"})
        .overwrite_output()
        .global_args(*QUIET_ARGS)
    )
    log.info("extract_frame_at: %s", " ".join(output.compile()))
    output.run(capture_stdout=True, capture_stderr=True)


def run_async(output_node, pipe_stdin=False, pipe_stdout=False, pipe_stderr=False):
    """Start an ffmpeg pipeline asynchronously via ffmpeg-python's run_async."""
    log.info("run_async: %s", " ".join(output_node.compile()))
    return output_node.run_async(
        pipe_stdin=pipe_stdin,
        pipe_stdout=pipe_stdout,
        pipe_stderr=pipe_stderr,
    )


def stop_proc(proc, timeout=5):
    """Gracefully stop an ffmpeg subprocess."""
    if proc and proc.poll() is None:
        proc.send_signal(signal.SIGINT)
        try:
            proc.wait(timeout=timeout)
        except subprocess.TimeoutExpired:
            proc.kill()