366 lines
13 KiB
Python
366 lines
13 KiB
Python
"""SessionProcessor: processes raw frame data and audio from recordings.
|
|
|
|
Receives raw JPEG frames from StreamRecorder (via on_raw_frame callback) and
|
|
handles all frame processing: file writing, frame index, GUI callbacks.
|
|
|
|
Also extracts audio from fMP4 files by polling (latency-insensitive).
|
|
|
|
The boundary with StreamRecorder:
|
|
Recorder: reads pipe → fires on_raw_frame(jpeg_bytes, global_ts)
|
|
Processor: writes JPEG to disk, updates index, fires on_new_frames to GUI
|
|
|
|
When Rust owns transport, SessionProcessor connects to the server's Unix
|
|
domain socket (scene.sock) for a live H.264 stream, pipes it to ffmpeg
|
|
for GPU scene detection. Continuous stream — no polling, no restarts.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import socket
|
|
import time
|
|
from pathlib import Path
|
|
from queue import Queue, Empty
|
|
from threading import Thread
|
|
|
|
from cht.config import (
|
|
AUDIO_EXTRACT_INTERVAL,
|
|
AUDIO_SAFETY_MARGIN,
|
|
SCENE_THRESHOLD,
|
|
SCENE_FLUSH_FRAMES,
|
|
)
|
|
from cht.stream import ffmpeg as ff
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class SessionProcessor:
|
|
"""Writes scene frames to disk and extracts audio from fMP4."""
|
|
|
|
def __init__(self, session_dir: Path):
|
|
self.session_dir = session_dir
|
|
self.frames_dir = session_dir / "frames"
|
|
self.audio_dir = session_dir / "audio"
|
|
|
|
self._stop_flags: set[str] = set()
|
|
self._threads: dict[str, Thread] = {}
|
|
self._on_new_frames = None
|
|
self._on_new_audio = None
|
|
|
|
self._get_recording_path = None
|
|
self._get_current_global_offset = None
|
|
|
|
def attach(self, get_recording_path, get_current_global_offset):
|
|
"""Wire up callbacks to query the recorder's current state."""
|
|
self._get_recording_path = get_recording_path
|
|
self._get_current_global_offset = get_current_global_offset
|
|
|
|
# -- Scene frame handling (called from recorder's pipe thread) --
|
|
|
|
def on_raw_frame(self, jpeg_bytes: bytes, global_ts: float):
|
|
"""Receive a raw JPEG frame from the recorder pipe. Write and index it."""
|
|
frame_num = self._next_frame_number()
|
|
frame_id = f"F{frame_num:04d}"
|
|
frame_path = self.frames_dir / f"{frame_id}.jpg"
|
|
frame_path.write_bytes(jpeg_bytes)
|
|
|
|
entry = {
|
|
"id": frame_id,
|
|
"timestamp": global_ts,
|
|
"path": str(frame_path),
|
|
"sent_to_agent": False,
|
|
}
|
|
self._append_frame_index(entry)
|
|
log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
|
|
|
|
if self._on_new_frames:
|
|
self._on_new_frames([entry])
|
|
|
|
def set_on_new_frames(self, cb):
|
|
self._on_new_frames = cb
|
|
|
|
# -- On-demand capture (recorder extracts bytes, processor indexes) --
|
|
|
|
def on_captured_frame(self, jpeg_bytes: bytes, global_ts: float):
|
|
"""Receive a manually captured frame. Write and index it."""
|
|
self.on_raw_frame(jpeg_bytes, global_ts)
|
|
|
|
# -- Frame index --
|
|
|
|
@property
|
|
def frame_count(self) -> int:
|
|
index_path = self.frames_dir / "index.json"
|
|
if index_path.exists():
|
|
try:
|
|
return len(json.loads(index_path.read_text()))
|
|
except Exception:
|
|
pass
|
|
return 0
|
|
|
|
def _next_frame_number(self) -> int:
|
|
index_path = self.frames_dir / "index.json"
|
|
if index_path.exists():
|
|
try:
|
|
return len(json.loads(index_path.read_text())) + 1
|
|
except Exception:
|
|
pass
|
|
return 1
|
|
|
|
def _append_frame_index(self, entry: dict):
|
|
index_path = self.frames_dir / "index.json"
|
|
index = json.loads(index_path.read_text()) if index_path.exists() else []
|
|
index.append(entry)
|
|
index_path.write_text(json.dumps(index, indent=2))
|
|
|
|
# -- Scene detection via Unix socket (Rust transport mode) --
|
|
|
|
def start_scene_detector(self, threshold=None):
|
|
"""Connect to Rust server's scene socket and run GPU scene detection.
|
|
|
|
The server provides a live H.264 stream via a Unix domain socket at
|
|
stream/scene.sock. We pipe it to ffmpeg for CUDA scene detection —
|
|
continuous stream, no polling, no restarts.
|
|
"""
|
|
threshold = threshold or SCENE_THRESHOLD
|
|
t = Thread(target=self._scene_detect_loop, daemon=True,
|
|
name="scene_detector", args=(threshold,))
|
|
t.start()
|
|
self._threads["scene_detector"] = t
|
|
|
|
def _scene_detect_loop(self, threshold):
|
|
"""Connect to scene socket, pipe H.264 to ffmpeg, read scene frames.
|
|
|
|
Retries on failure (e.g. ffmpeg dies from bad initial frames).
|
|
The server buffers the latest keyframe so reconnects start clean.
|
|
"""
|
|
socket_path = self.session_dir / "stream" / "scene.sock"
|
|
|
|
# Wait for the socket to appear (server creates it on session start).
|
|
while "stop" not in self._stop_flags:
|
|
if socket_path.exists():
|
|
break
|
|
time.sleep(0.5)
|
|
if "stop" in self._stop_flags:
|
|
return
|
|
|
|
while "stop" not in self._stop_flags:
|
|
try:
|
|
self._run_scene_session(socket_path, threshold)
|
|
except Exception:
|
|
log.exception("Scene detector error")
|
|
if "stop" in self._stop_flags:
|
|
break
|
|
log.info("Scene detector: reconnecting in 2s...")
|
|
time.sleep(2.0)
|
|
|
|
log.info("Scene detector stopped")
|
|
|
|
def _run_scene_session(self, socket_path, threshold):
|
|
"""Single scene detection session: connect, run ffmpeg, read frames."""
|
|
log.info("Scene detector: connecting to %s", socket_path)
|
|
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
try:
|
|
sock.connect(str(socket_path))
|
|
except OSError as e:
|
|
log.error("Scene detector: connect failed: %s", e)
|
|
return
|
|
|
|
log.info("Scene detector: connected, starting ffmpeg")
|
|
node = ff.detect_scenes_from_pipe(
|
|
scene_threshold=threshold, flush_frames=SCENE_FLUSH_FRAMES,
|
|
)
|
|
proc = ff.run_async(node, pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
|
|
self._procs = getattr(self, "_procs", {})
|
|
self._procs["scene_detector"] = proc
|
|
|
|
# Thread: socket → ffmpeg stdin
|
|
def _feed_stdin():
|
|
try:
|
|
while "stop" not in self._stop_flags:
|
|
data = sock.recv(65536)
|
|
if not data:
|
|
break
|
|
try:
|
|
proc.stdin.write(data)
|
|
proc.stdin.flush()
|
|
except (BrokenPipeError, OSError):
|
|
break
|
|
finally:
|
|
try:
|
|
proc.stdin.close()
|
|
except OSError:
|
|
pass
|
|
sock.close()
|
|
log.debug("Scene detector: stdin feeder stopped")
|
|
|
|
stdin_t = Thread(target=_feed_stdin, daemon=True, name="scene_stdin")
|
|
stdin_t.start()
|
|
|
|
# Thread: ffmpeg stderr → parse showinfo timestamps
|
|
ts_queue = Queue()
|
|
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
|
|
|
|
def _read_stderr():
|
|
for raw in proc.stderr:
|
|
line = raw.decode("utf-8", errors="replace").rstrip()
|
|
if not line:
|
|
continue
|
|
if "showinfo" in line:
|
|
pts_match = re.search(r"pts_time:\s*([\d.]+)", line)
|
|
if pts_match:
|
|
ts_queue.put(float(pts_match.group(1)))
|
|
elif line.startswith("[") or "error" in line.lower() or "warning" in line.lower():
|
|
log.debug("[scene] %s", line)
|
|
log.debug("[scene] stderr closed")
|
|
|
|
stderr_t = Thread(target=_read_stderr, daemon=True, name="scene_stderr")
|
|
stderr_t.start()
|
|
|
|
# Main: ffmpeg stdout → extract JPEG frames
|
|
last_pts = 0.0
|
|
buf = b""
|
|
raw_fd = proc.stdout.fileno()
|
|
while True:
|
|
chunk = os.read(raw_fd, 65536)
|
|
if not chunk:
|
|
break
|
|
buf += chunk
|
|
while True:
|
|
soi = buf.find(b"\xff\xd8")
|
|
if soi < 0:
|
|
buf = b""
|
|
break
|
|
eoi = buf.find(b"\xff\xd9", soi + 2)
|
|
if eoi < 0:
|
|
buf = buf[soi:]
|
|
break
|
|
jpeg_data = buf[soi:eoi + 2]
|
|
buf = buf[eoi + 2:]
|
|
|
|
try:
|
|
pts_time = ts_queue.get(timeout=2.0)
|
|
except Empty:
|
|
log.warning("No timestamp for scene frame")
|
|
pts_time = 0.0
|
|
|
|
# Skip flush frames (within 100ms of previous = duplicate)
|
|
if pts_time - last_pts < 0.1:
|
|
log.debug("Skipping flush frame at pts=%.3f", pts_time)
|
|
continue
|
|
last_pts = pts_time
|
|
|
|
global_ts = pts_time + offset
|
|
self.on_raw_frame(jpeg_data, global_ts)
|
|
|
|
ff.stop_proc(proc, timeout=3)
|
|
log.info("Scene detector: ffmpeg exited (last_pts=%.1f)", last_pts)
|
|
|
|
def start_audio_extractor(self, on_new_audio=None):
|
|
"""Periodically extract audio from the growing fMP4 as WAV chunks."""
|
|
self._on_new_audio = on_new_audio
|
|
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
t = Thread(target=self._audio_loop, daemon=True, name="audio_extractor")
|
|
t.start()
|
|
self._threads["audio_extractor"] = t
|
|
|
|
def stop(self):
|
|
self._stop_flags.add("stop")
|
|
for name, proc in getattr(self, "_procs", {}).items():
|
|
ff.stop_proc(proc, timeout=3)
|
|
self._procs = {} if hasattr(self, "_procs") else {}
|
|
|
|
def _has_audio_stream(self, seg: Path) -> bool:
|
|
try:
|
|
import ffmpeg as ffmpeg_lib
|
|
info = ffmpeg_lib.probe(str(seg))
|
|
return any(s.get("codec_type") == "audio" for s in info.get("streams", []))
|
|
except Exception:
|
|
return False
|
|
|
|
def _find_audio_source(self):
|
|
"""Find audio source: fMP4 with audio track, or standalone audio.aac from Rust server."""
|
|
seg = self._get_recording_path() if self._get_recording_path else None
|
|
if seg and seg.exists() and self._has_audio_stream(seg):
|
|
return seg
|
|
# Rust server writes raw AAC alongside the fMP4
|
|
stream_dir = self.session_dir / "stream"
|
|
aac_path = stream_dir / "audio.aac"
|
|
if aac_path.exists() and aac_path.stat().st_size > 100:
|
|
return aac_path
|
|
return None
|
|
|
|
def _audio_loop(self):
|
|
processed_time = 0.0
|
|
chunk_num = 0
|
|
current_source = None
|
|
|
|
while "stop" not in self._stop_flags:
|
|
time.sleep(AUDIO_EXTRACT_INTERVAL)
|
|
|
|
source = self._find_audio_source()
|
|
if not source:
|
|
continue
|
|
|
|
if source != current_source:
|
|
current_source = source
|
|
processed_time = 0.0
|
|
chunk_num = 0
|
|
log.info("Audio extractor: using %s", source.name)
|
|
|
|
if source.stat().st_size < 100_000:
|
|
continue
|
|
|
|
safe_duration = self._probe_safe_duration(source)
|
|
if safe_duration is None or safe_duration <= 0:
|
|
continue
|
|
|
|
process_to = safe_duration - AUDIO_SAFETY_MARGIN
|
|
if process_to <= processed_time + 1.0:
|
|
continue
|
|
|
|
chunk_duration = process_to - processed_time
|
|
wav_path = self.audio_dir / f"chunk_{chunk_num:04d}.wav"
|
|
|
|
try:
|
|
ff.extract_audio_chunk(source, wav_path,
|
|
start_time=processed_time,
|
|
duration=chunk_duration)
|
|
except Exception as e:
|
|
log.error("Audio extraction failed: %s", e)
|
|
continue
|
|
|
|
if wav_path.exists() and wav_path.stat().st_size > 100:
|
|
offset = self._get_current_global_offset() if self._get_current_global_offset else 0.0
|
|
global_start = processed_time + offset
|
|
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
|
|
wav_path.name, processed_time, process_to, global_start)
|
|
if self._on_new_audio:
|
|
self._on_new_audio(
|
|
wav_path, global_start, chunk_duration,
|
|
segment_path=source, local_start=processed_time,
|
|
)
|
|
chunk_num += 1
|
|
|
|
processed_time = process_to
|
|
|
|
log.info("Audio extractor stopped")
|
|
|
|
def _probe_safe_duration(self, seg: Path):
|
|
try:
|
|
import ffmpeg as ffmpeg_lib
|
|
info = ffmpeg_lib.probe(str(seg))
|
|
dur = float(info.get("format", {}).get("duration", 0))
|
|
if dur > 0:
|
|
return dur
|
|
for stream in info.get("streams", []):
|
|
sdur = float(stream.get("duration", 0))
|
|
if sdur > 0:
|
|
return sdur
|
|
except Exception:
|
|
pass
|
|
try:
|
|
return seg.stat().st_size / 65_000
|
|
except Exception:
|
|
return None
|