first try new approach

This commit is contained in:
2026-04-03 10:47:27 -03:00
parent fbf9984a5d
commit 2a049d8c2b
5 changed files with 164 additions and 254 deletions

View File

@@ -13,7 +13,6 @@ SESSIONS_DIR = Path(os.environ.get("CHT_SESSIONS_DIR", DATA_DIR / "sessions"))
STREAM_HOST = "0.0.0.0"
STREAM_PORT = 4444
RELAY_PORT = 4445 # UDP loopback relay for live display
SCENE_RELAY_PORT = 4446 # UDP loopback relay for scene detector
# Frame extraction — scene-only, no interval fallback
SCENE_THRESHOLD = 0.10 # 0-1, lower = more sensitive; 0.1 catches slide/window changes

View File

@@ -39,7 +39,7 @@ def receive_and_record(stream_url, output_path):
)
def receive_record_and_relay(stream_url, output_path, relay_url, scene_relay_url=None):
def receive_record_and_relay(stream_url, output_path, relay_url):
"""Receive TCP stream, write to fragmented MP4, and relay to UDP loopback.
Fragmented MP4 (frag_keyframe+empty_moov) avoids MKV tail corruption:
@@ -47,7 +47,6 @@ def receive_record_and_relay(stream_url, output_path, relay_url, scene_relay_url
always valid up to the last complete fragment (~1 keyframe interval ≈ 2s).
Uses ffmpeg tee via merge_outputs: one process, identical timestamps.
Optionally sends a second relay for the scene detector.
"""
stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay")
file_out = ffmpeg.output(
@@ -61,50 +60,49 @@ def receive_record_and_relay(stream_url, output_path, relay_url, scene_relay_url
stream, relay_url,
c="copy", f="mpegts",
)
outputs = [file_out, relay_out]
if scene_relay_url:
scene_out = ffmpeg.output(
stream, scene_relay_url,
c="copy", f="mpegts",
)
outputs.append(scene_out)
return ffmpeg.merge_outputs(*outputs).global_args(*QUIET_ARGS)
return ffmpeg.merge_outputs(file_out, relay_out).global_args(*QUIET_ARGS)
def start_live_scene_detector(stream_url, output_dir, scene_threshold=0.10,
start_number=1):
"""Start a persistent ffmpeg process that detects scenes from a live stream.
def receive_record_relay_and_detect(stream_url, output_path, relay_url,
frames_dir, scene_threshold=0.10,
start_number=1):
"""Single process: receive TCP → record fMP4 + relay UDP + scene detect.
Reads from the UDP relay in real-time — no file seeking, no restart overhead.
Writes frame JPEGs and emits showinfo on stderr as scenes are detected.
Returns the async process (stderr must be read continuously).
One ffmpeg process, three output branches from the same TCP input:
1. File output — c=copy to fMP4
2. UDP relay — c=copy to mpegts for live display
3. Scene frames — CUDA decode → select(scene) → showinfo → JPEG files
The scene filter runs on decoded frames in-process, so detection latency
is near-zero (no polling, no file re-reading, no separate process).
Stderr must be read continuously to parse showinfo lines.
"""
stream = ffmpeg.input(stream_url, fflags="nobuffer", flags="low_delay",
hwaccel="cuda")
# Copy outputs (raw packet remux, no decode)
file_out = ffmpeg.output(
stream, str(output_path),
c="copy", f="mp4",
movflags="frag_keyframe+empty_moov+default_base_moof",
flush_packets=1,
**{"bsf:a": "aac_adtstoasc"},
)
relay_out = ffmpeg.output(
stream, relay_url,
c="copy", f="mpegts",
)
# Scene detection output (decode + filter → JPEG)
select_expr = f"gt(scene,{scene_threshold})"
stream = ffmpeg.input(
stream_url,
fflags="nobuffer+flush_packets",
flags="low_delay",
probesize="32000",
analyzeduration="0",
hwaccel="cuda",
)
stream = stream.filter("select", select_expr).filter("showinfo")
output = (
ffmpeg.output(
stream,
str(output_dir / "F%04d.jpg"),
vsync="vfr",
flush_packets=1,
**{"q:v": "2"},
start_number=start_number,
)
.global_args(*GLOBAL_ARGS)
scene_stream = stream.filter("select", select_expr).filter("showinfo")
scene_out = ffmpeg.output(
scene_stream, str(frames_dir / "F%04d.jpg"),
vsync="vfr", flush_packets=1, **{"q:v": "2"},
start_number=start_number,
)
log.info("start_live_scene_detector: %s", " ".join(output.compile()))
return run_async(output, pipe_stderr=True)
return ffmpeg.merge_outputs(file_out, relay_out, scene_out).global_args(*GLOBAL_ARGS)
def extract_scene_frames(input_path, output_dir, scene_threshold=0.10,

View File

@@ -2,23 +2,22 @@
StreamManager: orchestrates ffmpeg for recording and scene detection.
Architecture:
sender → TCP:4444 → ffmpeg (writes recording.ts)
recording.ts → mpv (plays via Timeline)
recording.ts → ffmpeg scene detection (periodic, incremental)
sender → TCP:4444 → single ffmpeg process:
1. writes fMP4 to disk (c=copy)
2. relays UDP for live display (c=copy)
3. CUDA decode → scene filter → JPEG frames (real-time)
"""
import json
import logging
import re
import time
from pathlib import Path
from threading import Thread
from cht.config import (
STREAM_HOST,
STREAM_PORT,
RELAY_PORT,
SCENE_RELAY_PORT,
SCENE_THRESHOLD,
SESSIONS_DIR,
AUDIO_EXTRACT_INTERVAL,
@@ -156,10 +155,6 @@ class StreamManager:
def relay_url(self):
return f"udp://127.0.0.1:{RELAY_PORT}"
@property
def scene_relay_url(self):
return f"udp://127.0.0.1:{SCENE_RELAY_PORT}"
@property
def recording_path(self):
"""Current recording segment path."""
@@ -198,108 +193,33 @@ class StreamManager:
return proc is not None and proc.poll() is None
def _launch_recorder(self):
node = ff.receive_record_and_relay(
start_number = self._next_frame_number()
node = ff.receive_record_relay_and_detect(
self.stream_url, self.recording_path, self.relay_url,
self.frames_dir, scene_threshold=self.scene_threshold,
start_number=start_number,
)
proc = ff.run_async(node, pipe_stderr=True)
self._procs["recorder"] = proc
log.info("Recorder: pid=%s%s", proc.pid, self.recording_path)
self._start_stderr_reader("recorder", proc)
log.info("Recorder+scene: pid=%s%s (threshold=%.2f, start_number=%d)",
proc.pid, self.recording_path, self.scene_threshold, start_number)
self._start_scene_stderr_reader(proc, start_number)
# -- Scene Detection --
def start_scene_detector(self, on_new_frames=None):
"""Periodically run scene detection on new portions of the recording.
"""Register callback for new scene frames.
Args:
on_new_frames: callback(list of {id, timestamp, path}) for new frames
Scene detection runs inside the recorder process (single ffmpeg).
The stderr reader thread parses showinfo lines and fires this callback.
"""
self._on_new_frames = on_new_frames
def _detect():
processed_time = 0.0
current_segment = None
last_threshold = self.scene_threshold
while "stop" not in self._stop_flags:
time.sleep(1.0)
# Threshold changed — reset to re-process recent content
if self.scene_threshold != last_threshold:
log.info("Threshold changed %.2f%.2f, resetting",
last_threshold, self.scene_threshold)
last_threshold = self.scene_threshold
# Back up a bit to re-scan with new sensitivity
processed_time = max(0.0, processed_time - 10)
seg = self.recording_path
if not seg.exists():
continue
if seg != current_segment:
current_segment = seg
processed_time = 0.0
log.info("Scene detector: switched to %s", seg.name)
size = seg.stat().st_size
if size < 100_000:
continue
safe_duration = self._estimate_safe_duration()
if safe_duration is None or safe_duration <= 0:
continue
process_to = safe_duration - 1
if process_to <= processed_time + 0.5:
continue
log.info("Scene detection: %.1fs → %.1fs", processed_time, process_to)
new_frames = self._detect_scenes(
start_time=processed_time,
end_time=process_to,
)
if new_frames:
log.info("Found %d new scene frames (total: %d)",
len(new_frames), self._next_frame_number() - 1)
if self._on_new_frames:
self._on_new_frames(new_frames)
processed_time = process_to
log.info("Scene detector stopped")
t = Thread(target=_detect, daemon=True, name="scene_detector")
t.start()
self._threads["scene_detector"] = t
def _estimate_safe_duration(self):
"""Estimate recording duration. Uses ffprobe, falls back to file size.
For fragmented MP4 (empty_moov), format-level duration is 0 so we
check stream duration from the last video stream instead.
"""
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(self.recording_path))
# Format duration works for non-fragmented; 0 for empty_moov fMP4
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
# Fragmented MP4: check video stream duration
for stream in info.get("streams", []):
sdur = float(stream.get("duration", 0))
if sdur > 0:
return sdur
except Exception:
pass
# Fallback: rough estimate from file size (~500kbit/s typical for this stream)
try:
size = self.recording_path.stat().st_size
return size / 65_000 # ~500kbps → 62.5 KB/s
except Exception:
return None
def update_scene_threshold(self, new_threshold):
"""Update scene threshold. Restarts the recorder to apply new filter."""
self.scene_threshold = new_threshold
log.info("Threshold changed → %.2f, restarting recorder", new_threshold)
self.restart_recorder()
def _next_frame_number(self):
"""Determine next frame number from the index (source of truth)."""
@@ -309,69 +229,82 @@ class StreamManager:
return len(index) + 1
return 1
def _detect_scenes(self, start_time, end_time):
"""Run ffmpeg scene detection on a time range. Returns list of new frame entries."""
import time as _time
t0 = _time.monotonic()
duration = end_time - start_time
start_number = self._next_frame_number()
try:
_stdout, stderr = ff.extract_scene_frames(
self.recording_path,
self.frames_dir,
scene_threshold=self.scene_threshold,
start_number=start_number,
start_time=start_time,
duration=duration,
)
except Exception as e:
log.error("Scene detection failed: %s", e)
return []
# Parse new frames from showinfo output — match each showinfo line
# to the corresponding file ffmpeg wrote (sequential from start_number)
new_frames = []
def _append_frame_index(self, entry):
"""Append a frame entry to index.json."""
index_path = self.frames_dir / "index.json"
index = json.loads(index_path.read_text()) if index_path.exists() else []
index.append(entry)
index_path.write_text(json.dumps(index, indent=2))
offset = self.current_global_offset
frame_num = start_number
for line in stderr.splitlines():
if "showinfo" not in line:
continue
pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
if pts_match:
def _start_scene_stderr_reader(self, proc, start_number):
"""Read stderr continuously, parsing showinfo lines for scene frames.
Each showinfo line corresponds to a JPEG that ffmpeg writes. We wait
briefly for the file to appear on disk (showinfo fires before the
muxer flushes), then update the index and fire the callback.
"""
def _read():
frame_num = start_number
offset = self.current_global_offset
for raw in proc.stderr:
line = raw.decode("utf-8", errors="replace").rstrip()
if not line:
continue
if "showinfo" not in line:
log.debug("[recorder] %s", line)
continue
pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
if not pts_match:
continue
pts_time = float(pts_match.group(1))
frame_id = f"F{frame_num:04d}"
frame_path = self.frames_dir / f"{frame_id}.jpg"
if frame_path.exists():
entry = {
"id": frame_id,
"timestamp": pts_time + offset,
"path": str(frame_path),
"sent_to_agent": False,
}
index.append(entry)
new_frames.append(entry)
# Wait for ffmpeg to flush the JPEG (showinfo fires before mux)
for _ in range(20): # up to ~200ms
if frame_path.exists() and frame_path.stat().st_size > 0:
break
time.sleep(0.01)
if not frame_path.exists():
log.warning("Scene frame %s not found on disk", frame_id)
continue
entry = {
"id": frame_id,
"timestamp": pts_time + offset,
"path": str(frame_path),
"sent_to_agent": False,
}
self._append_frame_index(entry)
log.info("Scene frame: %s at %.1fs (pts=%.1f + offset=%.1f)",
frame_id, entry["timestamp"], pts_time, offset)
if self._on_new_frames:
self._on_new_frames([entry])
frame_num += 1
index_path.write_text(json.dumps(index, indent=2))
log.info("[recorder] stderr closed, exit=%s", proc.poll())
elapsed_ms = (_time.monotonic() - t0) * 1000
tel = getattr(self, "telemetry", None)
if tel:
tel.metric("scene_detection", {
"start": start_time, "end": end_time,
"duration": duration,
"frames_found": len(new_frames),
"total_frames": len(index),
"threshold": self.scene_threshold,
"elapsed_ms": round(elapsed_ms),
"file_duration": self._estimate_safe_duration() or 0,
})
Thread(target=_read, daemon=True, name="recorder_stderr").start()
return new_frames
def _probe_safe_duration(self):
"""Probe current recording duration via ffprobe. Returns seconds or None."""
try:
import ffmpeg as ffmpeg_lib
info = ffmpeg_lib.probe(str(self.recording_path))
dur = float(info.get("format", {}).get("duration", 0))
if dur > 0:
return dur
for stream in info.get("streams", []):
sdur = float(stream.get("duration", 0))
if sdur > 0:
return sdur
except Exception:
pass
try:
return self.recording_path.stat().st_size / 65_000
except Exception:
return None
def capture_now(self, on_new_frames=None):
"""Capture a single frame from the current recording position.
@@ -380,16 +313,14 @@ class StreamManager:
to the index. Runs in a thread to avoid blocking the UI.
"""
def _capture():
safe_duration = self._estimate_safe_duration()
safe_duration = self._probe_safe_duration()
if not safe_duration or safe_duration < 1:
log.warning("capture_now: recording too short")
return
local_timestamp = safe_duration - 1
timestamp = local_timestamp + self.current_global_offset
index_path = self.frames_dir / "index.json"
index = json.loads(index_path.read_text()) if index_path.exists() else []
frame_num = len(index) + 1
frame_num = self._next_frame_number()
frame_id = f"F{frame_num:04d}"
frame_path = self.frames_dir / f"{frame_id}.jpg"
@@ -409,8 +340,7 @@ class StreamManager:
"path": str(frame_path),
"sent_to_agent": False,
}
index.append(entry)
index_path.write_text(json.dumps(index, indent=2))
self._append_frame_index(entry)
log.info("Manual capture: %s at %.1fs", frame_id, timestamp)
if on_new_frames:
@@ -453,7 +383,7 @@ class StreamManager:
if seg.stat().st_size < 100_000:
continue
safe_duration = self._estimate_safe_duration()
safe_duration = self._probe_safe_duration()
if safe_duration is None or safe_duration <= 0:
continue
@@ -503,12 +433,3 @@ class StreamManager:
ff.stop_proc(proc)
self._procs.clear()
def _start_stderr_reader(self, name, proc):
def _read():
for line in proc.stderr:
text = line.decode("utf-8", errors="replace").rstrip()
if text:
log.debug("[%s] %s", name, text)
log.info("[%s] exited: %s", name, proc.poll())
Thread(target=_read, daemon=True, name=f"{name}_stderr").start()

View File

@@ -159,9 +159,9 @@ class ChtWindow(Adw.ApplicationWindow):
)
def _on_scene_threshold(self, val):
if self._lifecycle.stream_mgr:
if self._lifecycle.stream_mgr and not self._lifecycle.stream_mgr.readonly:
old = self._lifecycle.stream_mgr.scene_threshold
self._lifecycle.stream_mgr.scene_threshold = val
self._lifecycle.stream_mgr.update_scene_threshold(val)
if self._telemetry:
self._telemetry.event("scene_threshold_changed", {"from": old, "to": val})