almost back to working state with rust transport

This commit is contained in:
2026-04-09 22:15:16 -03:00
parent ff96dcb4f7
commit 512d8ecef8
13 changed files with 1504 additions and 488 deletions

View File

@@ -178,15 +178,45 @@ def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,
return stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace")
def detect_scenes_from_pipe(scene_threshold=0.10, flush_frames=2, fps=30):
"""Scene-detect from piped raw H.264 on stdin. Returns a node for run_async.
Used when Rust server provides a live H.264 stream via Unix socket.
Caller bridges the socket to ffmpeg's stdin and reads stdout/stderr:
- stdin: raw H.264 from the socket
- stdout: MJPEG pipe (JPEG frames on scene change)
- stderr: showinfo lines with pts_time timestamps
"""
stream = ffmpeg.input("pipe:0", f="h264", framerate=fps, hwaccel="cuda")
scene_expr = f"gt(scene,{scene_threshold})"
if flush_frames > 0:
mod_val = 1 + flush_frames
flush_expr = f"eq(n,prev_selected_n+1)*mod(selected_n,{mod_val})"
select_expr = f"{scene_expr}+{flush_expr}"
else:
select_expr = scene_expr
scene_stream = stream.filter("select", select_expr).filter("showinfo")
return ffmpeg.output(
scene_stream, "pipe:1",
f="image2pipe", vcodec="mjpeg",
flush_packets=1, strict="unofficial",
**{"q:v": "2", "fps_mode": "passthrough"},
).global_args(*GLOBAL_ARGS)
def extract_audio_chunk(input_path, output_path, start_time=0.0, duration=None):
"""Extract audio from recording as 16kHz mono WAV (optimal for Whisper).
Uses input-level seeking (-ss before -i) for fast keyframe-based seek.
Supports fMP4 (auto-detect) and raw AAC files (explicit format hint).
Returns (stdout, stderr) as decoded strings.
"""
kwargs = {"ss": start_time}
if duration is not None:
kwargs["t"] = duration
# Raw AAC files need explicit format hint
if str(input_path).endswith(".aac"):
kwargs["f"] = "aac"
stream = ffmpeg.input(str(input_path), **kwargs)
output = (
ffmpeg.output(
@@ -219,10 +249,11 @@ def extract_frame_at(input_path, output_path, timestamp):
output.run(capture_stdout=True, capture_stderr=True)
def run_async(output_node, pipe_stdout=False, pipe_stderr=False):
def run_async(output_node, pipe_stdin=False, pipe_stdout=False, pipe_stderr=False):
"""Start an ffmpeg pipeline asynchronously via ffmpeg-python's run_async."""
log.info("run_async: %s", " ".join(output_node.compile()))
return output_node.run_async(
pipe_stdin=pipe_stdin,
pipe_stdout=pipe_stdout,
pipe_stderr=pipe_stderr,
)

View File

@@ -57,14 +57,29 @@ class StreamLifecycle:
def tracker(self) -> RecordingTracker | None:
return self._tracker
def start(self, session_id=None) -> StreamManager:
"""Start recording and all background processes. Returns the StreamManager."""
def start(self, session_id=None, rust_transport=False) -> StreamManager:
"""Start recording and all background processes. Returns the StreamManager.
rust_transport=True: skip StreamRecorder (Rust cht-server handles TCP +
fMP4 + UDP relay). Session dir is discovered from data/active-session
written by cht-server on first client connection.
"""
self._streaming = True
self._gone_live = False
self._rust_transport = rust_transport
self._stream_mgr = StreamManager(session_id=session_id)
self._stream_mgr.setup_dirs()
self._stream_mgr.start_recorder()
if rust_transport:
# Wait for cht-server to write the active session path.
session_dir = self._wait_for_rust_session()
if session_dir is None:
log.error("Timed out waiting for cht-server session")
self._streaming = False
return None
self._stream_mgr = StreamManager.from_rust_session(session_dir)
else:
self._stream_mgr = StreamManager(session_id=session_id)
self._stream_mgr.setup_dirs()
self._stream_mgr.start_recorder()
self._tracker = RecordingTracker(
get_segments=lambda: self._stream_mgr.recording_segments if self._stream_mgr else [],
@@ -76,10 +91,28 @@ class StreamLifecycle:
self._stream_mgr.start_audio_extractor(on_new_audio=self._handle_new_audio)
GLib.timeout_add(1000, self._tick_live)
GLib.timeout_add(2000, self._check_recorder)
if not rust_transport:
GLib.timeout_add(2000, self._check_recorder)
return self._stream_mgr
def _wait_for_rust_session(self, timeout=30, poll_interval=0.5):
"""Poll data/active-session until cht-server writes it."""
import time
from pathlib import Path
from cht.config import DATA_DIR
marker = DATA_DIR / "active-session"
elapsed = 0.0
while elapsed < timeout:
if marker.exists():
session_dir = Path(marker.read_text().strip())
if session_dir.exists():
log.info("Rust session dir: %s", session_dir)
return session_dir
time.sleep(poll_interval)
elapsed += poll_interval
return None
def stop(self):
"""Stop all processes and reset state. Does NOT touch UI — caller handles that."""
if self._tracker:

View File

@@ -1,30 +1,25 @@
"""
StreamManager: orchestrates ffmpeg for recording and scene detection.
"""StreamManager: coordinates StreamRecorder and SessionProcessor.
Architecture:
sender → TCP:4444 → single ffmpeg process:
1. writes fMP4 to disk (c=copy)
2. relays UDP for live display (c=copy)
3. CUDA decode → scene filter → JPEG frames (real-time)
Thin facade that keeps the existing public API intact while delegating
to two focused classes:
StreamRecorder — ffmpeg network receiver + fMP4 recorder + scene detection
(to be replaced by cht-server in Rust in a future phase)
SessionProcessor — audio extraction from fMP4
(stays Python; reads files regardless of how they were written)
Callers (lifecycle.py, window.py) use StreamManager as before — no changes
needed there.
"""
import json
import logging
import re
import time
from threading import Thread
from pathlib import Path
from cht.config import (
STREAM_HOST,
STREAM_PORT,
RELAY_PORT,
SCENE_THRESHOLD,
SCENE_FLUSH_FRAMES,
SESSIONS_DIR,
AUDIO_EXTRACT_INTERVAL,
AUDIO_SAFETY_MARGIN,
)
from cht.stream import ffmpeg as ff
from cht.config import SCENE_THRESHOLD, SESSIONS_DIR
from cht.stream.recorder import StreamRecorder
from cht.stream.processor import SessionProcessor
log = logging.getLogger(__name__)
@@ -62,16 +57,55 @@ class StreamManager:
self.audio_dir = self.session_dir / "audio"
self.agent_dir = self.session_dir / "agent"
self._procs = {}
self._threads = {}
self._stop_flags = set()
self._segment = 0
self._segment_offsets = {0: 0.0} # segment_index → global_offset
self.scene_threshold = SCENE_THRESHOLD
self.readonly = False # True when loaded from existing session
self.telemetry = None # set by window after start
self.readonly = False
self.telemetry = None
self.recorder = StreamRecorder(self.session_dir)
self.processor = SessionProcessor(self.session_dir)
self.processor.attach(
get_recording_path=lambda: self.recorder.recording_path,
get_current_global_offset=lambda: self.recorder.current_global_offset,
)
# Wire recorder pipe output → processor frame handling
self.recorder.set_on_raw_frame(self.processor.on_raw_frame)
log.info("Session: %s", session_id)
@classmethod
def from_rust_session(cls, session_dir: Path):
"""Attach to a live session being recorded by cht-server (Rust).
No StreamRecorder is started — Rust owns the TCP + fMP4 + UDP relay.
SessionProcessor handles audio extraction from the growing fMP4.
Scene detection pipe is also skipped (Rust will handle it eventually).
"""
mgr = cls.__new__(cls)
mgr.session_id = session_dir.name
mgr.session_dir = session_dir
mgr.stream_dir = session_dir / "stream"
mgr.frames_dir = session_dir / "frames"
mgr.transcript_dir = session_dir / "transcript"
mgr.audio_dir = session_dir / "audio"
mgr.agent_dir = session_dir / "agent"
mgr.readonly = False
mgr.telemetry = None
# No recorder — Rust server owns transport + recording.
mgr.recorder = None
mgr.processor = SessionProcessor(session_dir)
mgr.processor.attach(
get_recording_path=lambda: next(iter(sorted(mgr.stream_dir.glob("recording_*.mp4"))), None)
if mgr.stream_dir.exists() else None,
get_current_global_offset=lambda: 0.0,
)
for d in (mgr.stream_dir, mgr.frames_dir, mgr.transcript_dir,
mgr.audio_dir, mgr.agent_dir):
d.mkdir(parents=True, exist_ok=True)
log.info("Attached to Rust session: %s", mgr.session_id)
return mgr
@classmethod
def from_existing(cls, session_id):
"""Load an existing session without starting any ffmpeg processes."""
@@ -80,51 +114,83 @@ class StreamManager:
if not mgr.session_dir.exists():
raise FileNotFoundError(f"Session not found: {session_id}")
mgr.readonly = True
# Point _segment to last recording segment
segments = mgr.recording_segments
if segments:
mgr._segment = len(segments) - 1
mgr._rebuild_offsets()
mgr.recorder._segment = max(0, len(mgr.recorder.recording_segments) - 1)
mgr.recorder._rebuild_offsets()
rebuild_manifest(mgr.session_dir)
log.info("Loaded existing session: %s (%d segments, %d frames)",
session_id, len(segments), mgr.frame_count)
session_id, len(mgr.recorder.recording_segments), mgr.frame_count)
return mgr
# -- Recorder delegation --
@property
def scene_threshold(self) -> float:
return self.recorder.scene_threshold if self.recorder else 0.10
@property
def relay_url(self) -> str:
return self.recorder.relay_url if self.recorder else "udp://127.0.0.1:4445"
@property
def recording_path(self) -> Path:
if self.recorder:
return self.recorder.recording_path
return next(iter(sorted(self.stream_dir.glob("recording_*.mp4"))), None)
@property
def recording_segments(self) -> list[Path]:
if self.recorder:
return self.recorder.recording_segments
return sorted(self.stream_dir.glob("recording_*.mp4"))
@property
def current_global_offset(self) -> float:
"""Global time offset for the current recording segment."""
return self._segment_offsets.get(self._segment, 0.0)
def _rebuild_offsets(self):
"""Compute global offsets from all segments on disk."""
from cht.session import probe_duration
offset = 0.0
self._segment_offsets = {}
for i, seg in enumerate(self.recording_segments):
self._segment_offsets[i] = offset
offset += probe_duration(seg)
def _advance_segment_offset(self, completed_segment_path):
"""Update offsets after a segment completes and a new one begins."""
from cht.session import probe_duration
dur = probe_duration(completed_segment_path)
prev_offset = self._segment_offsets.get(self._segment, 0.0)
self._segment_offsets[self._segment + 1] = prev_offset + dur
log.info("Segment %d completed (%.1fs), next offset: %.1fs",
self._segment, dur, prev_offset + dur)
return self.recorder.current_global_offset if self.recorder else 0.0
@property
def frame_count(self):
index_path = self.frames_dir / "index.json"
if index_path.exists():
try:
return len(json.loads(index_path.read_text()))
except Exception:
pass
return 0
def frame_count(self) -> int:
return self.processor.frame_count
def total_duration(self):
"""Probe total duration across all segments (for completed sessions)."""
def setup_dirs(self):
for d in (self.stream_dir, self.frames_dir, self.transcript_dir,
self.audio_dir, self.agent_dir):
d.mkdir(parents=True, exist_ok=True)
def start_recorder(self):
if self.recorder:
self.recorder.start()
def restart_recorder(self):
if self.recorder:
self.recorder.restart()
def recorder_alive(self) -> bool:
return self.recorder.alive() if self.recorder else True # Rust owns it
def start_scene_detector(self, on_new_frames=None):
if self.recorder:
self.recorder.set_on_new_scene_frames(on_new_frames)
else:
self.processor.set_on_new_frames(on_new_frames)
self.processor.start_scene_detector(threshold=SCENE_THRESHOLD)
def capture_now(self, on_new_frames=None):
self.processor.set_on_new_frames(on_new_frames)
if self.recorder:
self.recorder.capture_now(on_raw_frame=self.processor.on_captured_frame)
def update_scene_threshold(self, new_threshold: float):
if self.recorder:
self.recorder.update_scene_threshold(new_threshold)
# -- Processor delegation --
def start_audio_extractor(self, on_new_audio=None):
self.processor.start_audio_extractor(on_new_audio=on_new_audio)
# -- Session-level --
def total_duration(self) -> float:
total = 0.0
for seg in self.recording_segments:
try:
@@ -144,323 +210,8 @@ class StreamManager:
total += seg.stat().st_size / 65_000
return total
def setup_dirs(self):
for d in (self.stream_dir, self.frames_dir, self.transcript_dir, self.audio_dir, self.agent_dir):
d.mkdir(parents=True, exist_ok=True)
@property
def stream_url(self):
return f"tcp://{STREAM_HOST}:{STREAM_PORT}?listen"
@property
def relay_url(self):
return f"udp://127.0.0.1:{RELAY_PORT}"
@property
def recording_path(self):
"""Current recording segment path."""
return self.stream_dir / f"recording_{self._segment:03d}.mp4"
@property
def recording_segments(self):
"""All recording segments in order."""
return sorted(self.stream_dir.glob("recording_*.mp4"))
# -- Recording --
def start_recorder(self):
"""Start ffmpeg to receive TCP stream, write to fMP4, and relay to UDP."""
# Start after existing segments (for resumed sessions)
existing = self.recording_segments
self._segment = len(existing)
self._rebuild_offsets()
self._launch_recorder()
def restart_recorder(self):
"""Restart recorder into a new segment. Session stays alive."""
old = self._procs.pop("recorder", None)
if old:
ff.stop_proc(old)
completed_path = self.recording_path
self._advance_segment_offset(completed_path)
self._segment += 1
log.info("Restarting recorder → segment %d (offset %.1fs)",
self._segment, self.current_global_offset)
self._launch_recorder()
def recorder_alive(self):
"""Check if the recorder process is still running."""
proc = self._procs.get("recorder")
return proc is not None and proc.poll() is None
def _launch_recorder(self):
start_number = self._next_frame_number()
node = ff.receive_record_relay_and_detect(
self.stream_url, self.recording_path, self.relay_url,
scene_threshold=self.scene_threshold,
flush_frames=SCENE_FLUSH_FRAMES,
)
proc = ff.run_async(node, pipe_stdout=True, pipe_stderr=True)
self._procs["recorder"] = proc
log.info("Recorder+scene: pid=%s%s (threshold=%.2f, start_number=%d)",
proc.pid, self.recording_path, self.scene_threshold, start_number)
self._start_scene_readers(proc, start_number)
# -- Scene Detection --
def start_scene_detector(self, on_new_frames=None):
"""Register callback for new scene frames.
Scene detection runs inside the recorder process (single ffmpeg).
The stderr reader thread parses showinfo lines and fires this callback.
"""
self._on_new_frames = on_new_frames
def update_scene_threshold(self, new_threshold):
"""Update scene threshold. Restarts the recorder to apply new filter."""
self.scene_threshold = new_threshold
log.info("Threshold changed → %.2f, restarting recorder", new_threshold)
self.restart_recorder()
def _next_frame_number(self):
"""Determine next frame number from the index (source of truth)."""
index_path = self.frames_dir / "index.json"
if index_path.exists():
index = json.loads(index_path.read_text())
return len(index) + 1
return 1
def _append_frame_index(self, entry):
"""Append a frame entry to index.json."""
index_path = self.frames_dir / "index.json"
index = json.loads(index_path.read_text()) if index_path.exists() else []
index.append(entry)
index_path.write_text(json.dumps(index, indent=2))
def _start_scene_readers(self, proc, start_number):
"""Read scene frames from stdout (MJPEG pipe) and timestamps from stderr.
Two threads:
- stderr: parses showinfo lines, queues pts_time values
- stdout: reads JPEG frames from pipe, pairs with queued timestamps,
writes files to disk, fires callbacks immediately
"""
from queue import Queue, Empty
import os
ts_queue = Queue()
def _read_stderr():
for raw in proc.stderr:
line = raw.decode("utf-8", errors="replace").rstrip()
if not line:
continue
if "showinfo" not in line:
log.debug("[recorder] %s", line)
continue
pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
if pts_match:
ts_queue.put(float(pts_match.group(1)))
log.info("[recorder] stderr closed, exit=%s", proc.poll())
def _read_stdout():
frame_num = start_number
offset = self.current_global_offset
last_pts = -1.0
buf = b""
raw_fd = proc.stdout.fileno()
while True:
chunk = os.read(raw_fd, 65536)
if not chunk:
break
buf += chunk
while True:
soi = buf.find(b"\xff\xd8")
if soi < 0:
buf = b""
break
eoi = buf.find(b"\xff\xd9", soi + 2)
if eoi < 0:
buf = buf[soi:]
break
jpeg_data = buf[soi:eoi + 2]
buf = buf[eoi + 2:]
try:
pts_time = ts_queue.get(timeout=2.0)
except Empty:
log.warning("No timestamp for scene frame %d", frame_num)
pts_time = 0.0
# Skip flush frames (within 100ms of previous = duplicate)
if pts_time - last_pts < 0.1:
log.debug("Skipping flush frame at pts=%.3f", pts_time)
continue
last_pts = pts_time
frame_id = f"F{frame_num:04d}"
frame_path = self.frames_dir / f"{frame_id}.jpg"
frame_path.write_bytes(jpeg_data)
entry = {
"id": frame_id,
"timestamp": pts_time + offset,
"path": str(frame_path),
"sent_to_agent": False,
}
self._append_frame_index(entry)
log.info("Scene frame: %s at %.1fs (pts=%.1f + offset=%.1f)",
frame_id, entry["timestamp"], pts_time, offset)
if self._on_new_frames:
self._on_new_frames([entry])
frame_num += 1
log.info("[recorder] stdout closed")
Thread(target=_read_stderr, daemon=True, name="recorder_stderr").start()
Thread(target=_read_stdout, daemon=True, name="recorder_stdout").start()
def _probe_safe_duration(self):
"""Probe current recording duration via ffprobe. Returns seconds or None."""
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(self.recording_path))
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
for stream in info.get("streams", []):
sdur = float(stream.get("duration", 0))
if sdur > 0:
return sdur
except Exception:
pass
try:
return self.recording_path.stat().st_size / 65_000
except Exception:
return None
def capture_now(self, on_new_frames=None):
"""Capture a single frame from the current recording position.
Grabs the latest available frame (safe_duration - 1s) and adds it
to the index. Runs in a thread to avoid blocking the UI.
"""
def _capture():
safe_duration = self._probe_safe_duration()
if not safe_duration or safe_duration < 1:
log.warning("capture_now: recording too short")
return
local_timestamp = safe_duration - 1
timestamp = local_timestamp + self.current_global_offset
frame_num = self._next_frame_number()
frame_id = f"F{frame_num:04d}"
frame_path = self.frames_dir / f"{frame_id}.jpg"
try:
ff.extract_frame_at(self.recording_path, frame_path, local_timestamp)
except Exception as e:
log.error("capture_now failed: %s", e)
return
if not frame_path.exists():
log.warning("capture_now: frame not written")
return
entry = {
"id": frame_id,
"timestamp": timestamp,
"path": str(frame_path),
"sent_to_agent": False,
}
self._append_frame_index(entry)
log.info("Manual capture: %s at %.1fs", frame_id, timestamp)
if on_new_frames:
on_new_frames([entry])
Thread(target=_capture, daemon=True, name="capture_now").start()
# -- Audio Extraction --
def start_audio_extractor(self, on_new_audio=None):
"""Periodically extract audio from the growing recording as WAV chunks.
Same incremental pattern as scene detector: polls recording, extracts
new time range, calls back with (wav_path, start_time, duration).
Args:
on_new_audio: callback(wav_path, start_time, duration)
"""
self._on_new_audio = on_new_audio
self.audio_dir.mkdir(parents=True, exist_ok=True)
def _extract():
processed_time = 0.0
chunk_num = 0
current_segment = None
while "stop" not in self._stop_flags:
time.sleep(AUDIO_EXTRACT_INTERVAL)
seg = self.recording_path
if not seg.exists():
continue
if seg != current_segment:
current_segment = seg
processed_time = 0.0
chunk_num = 0
log.info("Audio extractor: switched to %s", seg.name)
if seg.stat().st_size < 100_000:
continue
safe_duration = self._probe_safe_duration()
if safe_duration is None or safe_duration <= 0:
continue
process_to = safe_duration - AUDIO_SAFETY_MARGIN
if process_to <= processed_time + 1.0:
continue
chunk_duration = process_to - processed_time
wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
try:
ff.extract_audio_chunk(
seg, wav_path,
start_time=processed_time,
duration=chunk_duration,
)
except Exception as e:
log.error("Audio extraction failed: %s", e)
continue
if wav_path.exists() and wav_path.stat().st_size > 100:
global_start = processed_time + self.current_global_offset
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
wav_path.name, processed_time, process_to, global_start)
if self._on_new_audio:
self._on_new_audio(
wav_path, global_start, chunk_duration,
segment_path=seg, local_start=processed_time,
)
chunk_num += 1
processed_time = process_to
log.info("Audio extractor stopped")
t = Thread(target=_extract, daemon=True, name="audio_extractor")
t.start()
self._threads["audio_extractor"] = t
# -- Lifecycle --
def stop_all(self):
log.info("Stopping all...")
self._stop_flags.add("stop")
for name, proc in self._procs.items():
log.info("Stopping %s", name)
ff.stop_proc(proc)
self._procs.clear()
self.processor.stop()
if self.recorder:
self.recorder.stop()

365
cht/stream/processor.py Normal file
View File

@@ -0,0 +1,365 @@
"""SessionProcessor: processes raw frame data and audio from recordings.
Receives raw JPEG frames from StreamRecorder (via on_raw_frame callback) and
handles all frame processing: file writing, frame index, GUI callbacks.
Also extracts audio from fMP4 files by polling (latency-insensitive).
The boundary with StreamRecorder:
Recorder: reads pipe → fires on_raw_frame(jpeg_bytes, global_ts)
Processor: writes JPEG to disk, updates index, fires on_new_frames to GUI
When Rust owns transport, SessionProcessor connects to the server's Unix
domain socket (scene.sock) for a live H.264 stream, pipes it to ffmpeg
for GPU scene detection. Continuous stream — no polling, no restarts.
"""
import json
import logging
import os
import re
import socket
import time
from pathlib import Path
from queue import Queue, Empty
from threading import Thread
from cht.config import (
AUDIO_EXTRACT_INTERVAL,
AUDIO_SAFETY_MARGIN,
SCENE_THRESHOLD,
SCENE_FLUSH_FRAMES,
)
from cht.stream import ffmpeg as ff
log = logging.getLogger(__name__)
class SessionProcessor:
"""Writes scene frames to disk and extracts audio from fMP4."""
def __init__(self, session_dir: Path):
self.session_dir = session_dir
self.frames_dir = session_dir / "frames"
self.audio_dir = session_dir / "audio"
self._stop_flags: set[str] = set()
self._threads: dict[str, Thread] = {}
self._on_new_frames = None
self._on_new_audio = None
self._get_recording_path = None
self._get_current_global_offset = None
def attach(self, get_recording_path, get_current_global_offset):
"""Wire up callbacks to query the recorder's current state."""
self._get_recording_path = get_recording_path
self._get_current_global_offset = get_current_global_offset
# -- Scene frame handling (called from recorder's pipe thread) --
def on_raw_frame(self, jpeg_bytes: bytes, global_ts: float):
"""Receive a raw JPEG frame from the recorder pipe. Write and index it."""
frame_num = self._next_frame_number()
frame_id = f"F{frame_num:04d}"
frame_path = self.frames_dir / f"{frame_id}.jpg"
frame_path.write_bytes(jpeg_bytes)
entry = {
"id": frame_id,
"timestamp": global_ts,
"path": str(frame_path),
"sent_to_agent": False,
}
self._append_frame_index(entry)
log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
if self._on_new_frames:
self._on_new_frames([entry])
def set_on_new_frames(self, cb):
self._on_new_frames = cb
# -- On-demand capture (recorder extracts bytes, processor indexes) --
def on_captured_frame(self, jpeg_bytes: bytes, global_ts: float):
"""Receive a manually captured frame. Write and index it."""
self.on_raw_frame(jpeg_bytes, global_ts)
# -- Frame index --
@property
def frame_count(self) -> int:
index_path = self.frames_dir / "index.json"
if index_path.exists():
try:
return len(json.loads(index_path.read_text()))
except Exception:
pass
return 0
def _next_frame_number(self) -> int:
index_path = self.frames_dir / "index.json"
if index_path.exists():
try:
return len(json.loads(index_path.read_text())) + 1
except Exception:
pass
return 1
def _append_frame_index(self, entry: dict):
index_path = self.frames_dir / "index.json"
index = json.loads(index_path.read_text()) if index_path.exists() else []
index.append(entry)
index_path.write_text(json.dumps(index, indent=2))
# -- Scene detection via Unix socket (Rust transport mode) --
def start_scene_detector(self, threshold=None):
"""Connect to Rust server's scene socket and run GPU scene detection.
The server provides a live H.264 stream via a Unix domain socket at
stream/scene.sock. We pipe it to ffmpeg for CUDA scene detection —
continuous stream, no polling, no restarts.
"""
threshold = threshold or SCENE_THRESHOLD
t = Thread(target=self._scene_detect_loop, daemon=True,
name="scene_detector", args=(threshold,))
t.start()
self._threads["scene_detector"] = t
def _scene_detect_loop(self, threshold):
"""Connect to scene socket, pipe H.264 to ffmpeg, read scene frames.
Retries on failure (e.g. ffmpeg dies from bad initial frames).
The server buffers the latest keyframe so reconnects start clean.
"""
socket_path = self.session_dir / "stream" / "scene.sock"
# Wait for the socket to appear (server creates it on session start).
while "stop" not in self._stop_flags:
if socket_path.exists():
break
time.sleep(0.5)
if "stop" in self._stop_flags:
return
while "stop" not in self._stop_flags:
try:
self._run_scene_session(socket_path, threshold)
except Exception:
log.exception("Scene detector error")
if "stop" in self._stop_flags:
break
log.info("Scene detector: reconnecting in 2s...")
time.sleep(2.0)
log.info("Scene detector stopped")
def _run_scene_session(self, socket_path, threshold):
"""Single scene detection session: connect, run ffmpeg, read frames."""
log.info("Scene detector: connecting to %s", socket_path)
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
sock.connect(str(socket_path))
except OSError as e:
log.error("Scene detector: connect failed: %s", e)
return
log.info("Scene detector: connected, starting ffmpeg")
node = ff.detect_scenes_from_pipe(
scene_threshold=threshold, flush_frames=SCENE_FLUSH_FRAMES,
)
proc = ff.run_async(node, pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
self._procs = getattr(self, "_procs", {})
self._procs["scene_detector"] = proc
# Thread: socket → ffmpeg stdin
def _feed_stdin():
try:
while "stop" not in self._stop_flags:
data = sock.recv(65536)
if not data:
break
try:
proc.stdin.write(data)
proc.stdin.flush()
except (BrokenPipeError, OSError):
break
finally:
try:
proc.stdin.close()
except OSError:
pass
sock.close()
log.debug("Scene detector: stdin feeder stopped")
stdin_t = Thread(target=_feed_stdin, daemon=True, name="scene_stdin")
stdin_t.start()
# Thread: ffmpeg stderr → parse showinfo timestamps
ts_queue = Queue()
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
def _read_stderr():
for raw in proc.stderr:
line = raw.decode("utf-8", errors="replace").rstrip()
if not line:
continue
if "showinfo" in line:
pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
if pts_match:
ts_queue.put(float(pts_match.group(1)))
elif line.startswith("[") or "error" in line.lower() or "warning" in line.lower():
log.debug("[scene] %s", line)
log.debug("[scene] stderr closed")
stderr_t = Thread(target=_read_stderr, daemon=True, name="scene_stderr")
stderr_t.start()
# Main: ffmpeg stdout → extract JPEG frames
last_pts = 0.0
buf = b""
raw_fd = proc.stdout.fileno()
while True:
chunk = os.read(raw_fd, 65536)
if not chunk:
break
buf += chunk
while True:
soi = buf.find(b"\xff\xd8")
if soi < 0:
buf = b""
break
eoi = buf.find(b"\xff\xd9", soi + 2)
if eoi < 0:
buf = buf[soi:]
break
jpeg_data = buf[soi:eoi + 2]
buf = buf[eoi + 2:]
try:
pts_time = ts_queue.get(timeout=2.0)
except Empty:
log.warning("No timestamp for scene frame")
pts_time = 0.0
# Skip flush frames (within 100ms of previous = duplicate)
if pts_time - last_pts < 0.1:
log.debug("Skipping flush frame at pts=%.3f", pts_time)
continue
last_pts = pts_time
global_ts = pts_time + offset
self.on_raw_frame(jpeg_data, global_ts)
ff.stop_proc(proc, timeout=3)
log.info("Scene detector: ffmpeg exited (last_pts=%.1f)", last_pts)
def start_audio_extractor(self, on_new_audio=None):
"""Periodically extract audio from the growing fMP4 as WAV chunks."""
self._on_new_audio = on_new_audio
self.audio_dir.mkdir(parents=True, exist_ok=True)
t = Thread(target=self._audio_loop, daemon=True, name="audio_extractor")
t.start()
self._threads["audio_extractor"] = t
def stop(self):
self._stop_flags.add("stop")
for name, proc in getattr(self, "_procs", {}).items():
ff.stop_proc(proc, timeout=3)
self._procs = {} if hasattr(self, "_procs") else {}
def _has_audio_stream(self, seg: Path) -> bool:
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(seg))
return any(s.get("codec_type") == "audio" for s in info.get("streams", []))
except Exception:
return False
def _find_audio_source(self):
"""Find audio source: fMP4 with audio track, or standalone audio.aac from Rust server."""
seg = self._get_recording_path() if self._get_recording_path else None
if seg and seg.exists() and self._has_audio_stream(seg):
return seg
# Rust server writes raw AAC alongside the fMP4
stream_dir = self.session_dir / "stream"
aac_path = stream_dir / "audio.aac"
if aac_path.exists() and aac_path.stat().st_size > 100:
return aac_path
return None
def _audio_loop(self):
processed_time = 0.0
chunk_num = 0
current_source = None
while "stop" not in self._stop_flags:
time.sleep(AUDIO_EXTRACT_INTERVAL)
source = self._find_audio_source()
if not source:
continue
if source != current_source:
current_source = source
processed_time = 0.0
chunk_num = 0
log.info("Audio extractor: using %s", source.name)
if source.stat().st_size < 100_000:
continue
safe_duration = self._probe_safe_duration(source)
if safe_duration is None or safe_duration <= 0:
continue
process_to = safe_duration - AUDIO_SAFETY_MARGIN
if process_to <= processed_time + 1.0:
continue
chunk_duration = process_to - processed_time
wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
try:
ff.extract_audio_chunk(source, wav_path,
start_time=processed_time,
duration=chunk_duration)
except Exception as e:
log.error("Audio extraction failed: %s", e)
continue
if wav_path.exists() and wav_path.stat().st_size > 100:
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
global_start = processed_time + offset
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
wav_path.name, processed_time, process_to, global_start)
if self._on_new_audio:
self._on_new_audio(
wav_path, global_start, chunk_duration,
segment_path=source, local_start=processed_time,
)
chunk_num += 1
processed_time = process_to
log.info("Audio extractor stopped")
def _probe_safe_duration(self, seg: Path):
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(seg))
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
for stream in info.get("streams", []):
sdur = float(stream.get("duration", 0))
if sdur > 0:
return sdur
except Exception:
pass
try:
return seg.stat().st_size / 65_000
except Exception:
return None

256
cht/stream/recorder.py Normal file
View File

@@ -0,0 +1,256 @@
"""StreamRecorder: ffmpeg-based network receiver and session recorder.
Responsible for transport + real-time scene detection:
- TCP listen (receives mpegts from sender)
- Writing fragmented MP4 to disk
- UDP relay for live display
- Scene frame detection via ffmpeg stdout pipe (low-latency, ~same-second)
- Segment rotation
Scene detection lives here — not in SessionProcessor — because it reads from
the recorder's ffmpeg stdout pipe directly. Moving it to poll fMP4 would add
3-5s latency (disk IPC vs kernel pipe). When Rust replaces this class, scene
detection moves in-process (zero IPC, even faster).
SessionProcessor reads from the fMP4 files this class produces for
non-latency-sensitive work (audio extraction).
"""
import logging
import re
from pathlib import Path
from threading import Thread
from cht.config import (
STREAM_HOST,
STREAM_PORT,
RELAY_PORT,
SCENE_THRESHOLD,
SCENE_FLUSH_FRAMES,
)
from cht.stream import ffmpeg as ff
log = logging.getLogger(__name__)
class StreamRecorder:
"""Owns the ffmpeg recording process, relay, and real-time scene detection."""
def __init__(self, session_dir: Path, scene_threshold: float = SCENE_THRESHOLD):
self.session_dir = session_dir
self.stream_dir = session_dir / "stream"
self.scene_threshold = scene_threshold
self._procs: dict = {}
self._segment = 0
self._segment_offsets: dict[int, float] = {0: 0.0}
self._on_raw_frame = None # cb(jpeg_bytes: bytes, pts_time: float)
self._on_segment_complete = None
def set_on_raw_frame(self, cb):
"""Called with (jpeg_bytes, pts_time) for each scene-change frame."""
self._on_raw_frame = cb
def set_on_segment_complete(self, cb):
self._on_segment_complete = cb
# -- Lifecycle --
def start(self):
existing = self.recording_segments
self._segment = len(existing)
self._rebuild_offsets()
self._launch()
def stop(self):
for proc in self._procs.values():
ff.stop_proc(proc)
self._procs.clear()
def restart(self):
"""Rotate to a new segment and relaunch."""
old = self._procs.pop("recorder", None)
if old:
ff.stop_proc(old)
completed_path = self.recording_path
self._advance_segment_offset(completed_path)
self._segment += 1
log.info("Restarting recorder → segment %d (offset %.1fs)",
self._segment, self.current_global_offset)
if self._on_segment_complete:
self._on_segment_complete(completed_path)
self._launch()
def alive(self) -> bool:
proc = self._procs.get("recorder")
return proc is not None and proc.poll() is None
def update_scene_threshold(self, new_threshold: float):
"""Update threshold and restart recorder (restarts ffmpeg filter)."""
self.scene_threshold = new_threshold
log.info("Scene threshold → %.2f, restarting recorder", new_threshold)
self.restart()
# -- Properties --
@property
def stream_url(self) -> str:
return f"tcp://{STREAM_HOST}:{STREAM_PORT}?listen"
@property
def relay_url(self) -> str:
return f"udp://127.0.0.1:{RELAY_PORT}"
@property
def recording_path(self) -> Path:
return self.stream_dir / f"recording_{self._segment:03d}.mp4"
@property
def recording_segments(self) -> list[Path]:
return sorted(self.stream_dir.glob("recording_*.mp4"))
@property
def current_global_offset(self) -> float:
return self._segment_offsets.get(self._segment, 0.0)
# -- Internal --
def _launch(self):
node = ff.receive_record_relay_and_detect(
self.stream_url, self.recording_path, self.relay_url,
scene_threshold=self.scene_threshold,
flush_frames=SCENE_FLUSH_FRAMES,
)
proc = ff.run_async(node, pipe_stdout=True, pipe_stderr=True)
self._procs["recorder"] = proc
log.info("Recorder+scene: pid=%s%s (threshold=%.2f)",
proc.pid, self.recording_path, self.scene_threshold)
self._start_scene_readers(proc)
def _rebuild_offsets(self):
from cht.session import probe_duration
offset = 0.0
self._segment_offsets = {}
for i, seg in enumerate(self.recording_segments):
self._segment_offsets[i] = offset
offset += probe_duration(seg)
def _advance_segment_offset(self, completed_path: Path):
from cht.session import probe_duration
dur = probe_duration(completed_path)
prev = self._segment_offsets.get(self._segment, 0.0)
self._segment_offsets[self._segment + 1] = prev + dur
log.info("Segment %d completed (%.1fs), next offset: %.1fs",
self._segment, dur, prev + dur)
def _probe_safe_duration(self):
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(self.recording_path))
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
for stream in info.get("streams", []):
sdur = float(stream.get("duration", 0))
if sdur > 0:
return sdur
except Exception:
pass
try:
return self.recording_path.stat().st_size / 65_000
except Exception:
return None
def capture_now(self, on_raw_frame=None):
"""Extract a single frame from the current recording position.
Calls on_raw_frame(jpeg_bytes, pts_time) — SessionProcessor handles
file writing and index updates.
"""
def _capture():
safe_duration = self._probe_safe_duration()
if not safe_duration or safe_duration < 1:
log.warning("capture_now: recording too short")
return
local_timestamp = safe_duration - 1
import tempfile, os
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
ff.extract_frame_at(self.recording_path, tmp_path, local_timestamp)
if not tmp_path.exists():
log.warning("capture_now: frame not written")
return
jpeg_bytes = tmp_path.read_bytes()
except Exception as e:
log.error("capture_now failed: %s", e)
return
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
if on_raw_frame:
on_raw_frame(jpeg_bytes, local_timestamp + self.current_global_offset)
Thread(target=_capture, daemon=True, name="capture_now").start()
def _start_scene_readers(self, proc):
from queue import Queue, Empty
import os
ts_queue = Queue()
def _read_stderr():
for raw in proc.stderr:
line = raw.decode("utf-8", errors="replace").rstrip()
if not line:
continue
if "showinfo" not in line:
log.debug("[recorder] %s", line)
continue
m = re.search(r"pts_time:\s*([\d.]+)", line)
if m:
ts_queue.put(float(m.group(1)))
log.info("[recorder] stderr closed, exit=%s", proc.poll())
def _read_stdout():
offset = self.current_global_offset
last_pts = -1.0
buf = b""
raw_fd = proc.stdout.fileno()
while True:
chunk = os.read(raw_fd, 65536)
if not chunk:
break
buf += chunk
while True:
soi = buf.find(b"\xff\xd8")
if soi < 0:
buf = b""
break
eoi = buf.find(b"\xff\xd9", soi + 2)
if eoi < 0:
buf = buf[soi:]
break
jpeg_data = buf[soi:eoi + 2]
buf = buf[eoi + 2:]
try:
pts_time = ts_queue.get(timeout=2.0)
except Empty:
log.warning("No timestamp for scene frame, using 0")
pts_time = 0.0
if pts_time - last_pts < 0.1:
log.debug("Skipping flush frame at pts=%.3f", pts_time)
continue
last_pts = pts_time
global_ts = pts_time + offset
log.debug("Raw scene frame at pts=%.3f (global=%.3f)", pts_time, global_ts)
if self._on_raw_frame:
self._on_raw_frame(jpeg_data, global_ts)
log.info("[recorder] stdout closed")
Thread(target=_read_stderr, daemon=True, name="recorder_stderr").start()
Thread(target=_read_stdout, daemon=True, name="recorder_stdout").start()