restructure and test, pure python and rust transport both _work_

This commit is contained in:
2026-04-10 16:25:54 -03:00
parent 9d3ff2c6ba
commit e906b0a963
11 changed files with 1146 additions and 67 deletions

View File

@@ -10,8 +10,10 @@ from cht.config import (
STREAM_HOST,
STREAM_PORT,
SCENE_THRESHOLD,
MAX_FRAME_INTERVAL,
SCENE_FLUSH_FRAMES,
SEGMENT_DURATION,
AUDIO_EXTRACT_INTERVAL,
AUDIO_SAFETY_MARGIN,
)
@@ -41,8 +43,13 @@ def test_scene_threshold_range():
assert 0 < SCENE_THRESHOLD < 1
def test_max_frame_interval_positive():
assert MAX_FRAME_INTERVAL > 0
def test_scene_flush_frames_non_negative():
assert SCENE_FLUSH_FRAMES >= 0
def test_audio_intervals_positive():
assert AUDIO_EXTRACT_INTERVAL > 0
assert AUDIO_SAFETY_MARGIN > 0
def test_segment_duration_positive():

View File

@@ -86,6 +86,105 @@ class TestRunAsync:
assert "tcp://0.0.0.0:4444?listen" in " ".join(cmd)
class TestReceiveRecordRelayAndDetect:
"""P0 regression: single-process pipeline with 3 outputs + scene detection."""
def test_compiles_three_outputs(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
)
cmd_str = " ".join(str(c) for c in node.compile())
# fMP4 recording
assert "rec.mp4" in cmd_str
# UDP relay
assert "udp://127.0.0.1:4445" in cmd_str
# MJPEG pipe (scene detection output)
assert "pipe:1" in cmd_str
def test_fmp4_flags(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "frag_keyframe" in cmd_str
assert "empty_moov" in cmd_str
def test_scene_filter_uses_threshold(self, tmp_path):
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
scene_threshold=0.25,
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "0.25" in cmd_str
assert "scene" in cmd_str
assert "showinfo" in cmd_str
def test_flush_expression_included_when_flush_frames_gt_0(self, tmp_path):
"""P0 regression: flush trick must be present to push real frame through buffer."""
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
flush_frames=2,
)
cmd_str = " ".join(str(c) for c in node.compile())
# Flush expression: eq(n,prev_selected_n+1)*mod(selected_n,N)
assert "prev_selected_n" in cmd_str
assert "mod" in cmd_str
def test_no_flush_expression_when_flush_frames_zero(self, tmp_path):
"""P0 regression: flush=0 should produce a clean scene-only filter."""
node = ff.receive_record_relay_and_detect(
"tcp://0.0.0.0:4444?listen",
tmp_path / "rec.mp4",
"udp://127.0.0.1:4445",
flush_frames=0,
)
cmd_str = " ".join(str(c) for c in node.compile())
assert "prev_selected_n" not in cmd_str
def test_flush_mod_value_matches_flush_frames(self, tmp_path):
"""P0 regression: mod value must be flush_frames+1 to prevent chaining.
ffmpeg-python escapes commas in filtergraph as \\, so we check the
escaped form in the compiled command.
"""
for n in [1, 2, 3]:
node = ff.detect_scenes_from_pipe(flush_frames=n)
cmd_str = " ".join(str(c) for c in node.compile())
# Commas in filter expressions are escaped as \, in filtergraph
assert f"mod(selected_n\\,{n + 1})" in cmd_str
class TestDetectScenesFromPipe:
def test_reads_from_stdin(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "pipe:0" in cmd_str
def test_writes_mjpeg_to_stdout(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "pipe:1" in cmd_str
assert "mjpeg" in cmd_str
def test_includes_flush_expression(self):
node = ff.detect_scenes_from_pipe(flush_frames=2)
cmd_str = " ".join(str(c) for c in node.compile())
assert "prev_selected_n" in cmd_str
def test_h264_input_format(self):
node = ff.detect_scenes_from_pipe()
cmd_str = " ".join(str(c) for c in node.compile())
assert "h264" in cmd_str
class TestStopProc:
def test_sends_sigint_then_waits(self):
proc = MagicMock()

View File

@@ -1,7 +1,6 @@
"""Tests for cht.stream.manager — StreamManager."""
import json
import time
from unittest.mock import patch, MagicMock
import pytest
@@ -21,12 +20,15 @@ class TestInit:
def test_session_id_custom(self, manager):
assert manager.session_id == "test_session"
def test_recording_path(self, manager):
def test_recording_path_delegates_to_recorder(self, manager):
assert manager.recording_path.name == "recording_000.mp4"
def test_dirs_not_created_on_init(self, manager):
assert not manager.stream_dir.exists()
def test_relay_url_from_recorder(self, manager):
assert "4445" in manager.relay_url
class TestSetupDirs:
def test_creates_all_subdirs(self, manager):
@@ -34,61 +36,74 @@ class TestSetupDirs:
assert manager.stream_dir.is_dir()
assert manager.frames_dir.is_dir()
assert manager.transcript_dir.is_dir()
assert manager.audio_dir.is_dir()
assert manager.agent_dir.is_dir()
class TestStopAll:
@patch("cht.stream.manager.ff.stop_proc")
def test_stops_all_procs(self, mock_stop, manager):
proc = MagicMock()
manager._procs = {"recorder": proc}
def test_stop_all_calls_processor_and_recorder(self, manager):
manager.processor.stop = MagicMock()
manager.recorder.stop = MagicMock()
manager.stop_all()
mock_stop.assert_called_with(proc)
assert len(manager._procs) == 0
def test_sets_stop_flag(self, manager):
manager.stop_all()
assert "stop" in manager._stop_flags
class TestFrameIndex:
def test_next_frame_number_empty(self, manager):
manager.setup_dirs()
assert manager._next_frame_number() == 1
def test_next_frame_number_with_existing(self, manager):
manager.setup_dirs()
index = [{"id": "F0001"}, {"id": "F0002"}]
(manager.frames_dir / "index.json").write_text(json.dumps(index))
assert manager._next_frame_number() == 3
def test_append_frame_index(self, manager):
manager.setup_dirs()
entry = {"id": "F0001", "timestamp": 5.0, "path": "/tmp/F0001.jpg", "sent_to_agent": False}
manager._append_frame_index(entry)
index = json.loads((manager.frames_dir / "index.json").read_text())
assert len(index) == 1
assert index[0]["id"] == "F0001"
def test_append_frame_index_accumulates(self, manager):
manager.setup_dirs()
for i in range(3):
entry = {"id": f"F{i+1:04d}", "timestamp": float(i), "path": f"/tmp/F{i+1:04d}.jpg", "sent_to_agent": False}
manager._append_frame_index(entry)
index = json.loads((manager.frames_dir / "index.json").read_text())
assert len(index) == 3
manager.processor.stop.assert_called_once()
manager.recorder.stop.assert_called_once()
class TestSceneDetector:
def test_start_scene_detector_stores_callback(self, manager):
def test_python_path_sets_processor_callback(self, manager):
"""Python path (recorder present): on_new_frames goes to processor, not recorder."""
cb = MagicMock()
manager.start_scene_detector(on_new_frames=cb)
assert manager._on_new_frames is cb
assert manager.processor._on_new_frames is cb
def test_update_scene_threshold(self, manager):
manager.setup_dirs()
# Mock restart_recorder to avoid launching ffmpeg
manager.restart_recorder = MagicMock()
def test_python_path_does_not_start_processor_scene_detector(self, manager):
"""Python path: recorder owns scene detection — processor.start_scene_detector not called."""
manager.processor.start_scene_detector = MagicMock()
manager.start_scene_detector(on_new_frames=MagicMock())
manager.processor.start_scene_detector.assert_not_called()
def test_rust_path_sets_callback_and_starts_detector(self, tmp_path):
"""Rust path (no recorder): processor owns scene detection."""
with patch("cht.stream.manager.SESSIONS_DIR", tmp_path):
mgr = StreamManager.__new__(StreamManager)
mgr.recorder = None
mgr.processor = MagicMock()
from cht.stream.manager import SCENE_THRESHOLD
cb = MagicMock()
mgr.start_scene_detector(on_new_frames=cb)
mgr.processor.set_on_new_frames.assert_called_once_with(cb)
mgr.processor.start_scene_detector.assert_called_once_with(threshold=SCENE_THRESHOLD)
def test_update_scene_threshold_restarts_recorder(self, manager):
manager.recorder.update_scene_threshold = MagicMock()
manager.update_scene_threshold(0.25)
assert manager.scene_threshold == 0.25
manager.restart_recorder.assert_called_once()
manager.recorder.update_scene_threshold.assert_called_once_with(0.25)
class TestFromRustSession:
def test_attaches_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert mgr.recorder is None
assert mgr.session_id == "20260410_120000"
def test_relay_url_fallback_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert "4445" in mgr.relay_url
def test_recorder_alive_returns_true_without_recorder(self, tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
(session_dir / "stream").mkdir()
mgr = StreamManager.from_rust_session(session_dir)
assert mgr.recorder_alive() is True

231
tests/test_processor.py Normal file
View File

@@ -0,0 +1,231 @@
"""Tests for cht.stream.processor — SessionProcessor.
P0: regression tests for known bugs (flush dedup, index contract, audio callback crash)
P2: scene detection pipeline unit tests
"""
import json
import time
from pathlib import Path
from threading import Event
from unittest.mock import MagicMock, patch
import pytest
from cht.stream.processor import SessionProcessor
@pytest.fixture
def processor(tmp_path):
session_dir = tmp_path / "20260410_120000"
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.frames_dir.mkdir(parents=True, exist_ok=True)
proc.audio_dir.mkdir(parents=True, exist_ok=True)
proc.attach(
get_recording_path=lambda: None,
get_current_global_offset=lambda: 0.0,
)
yield proc
proc.stop()
# -- P2: on_raw_frame / index contract --
class TestOnRawFrame:
def test_writes_jpeg_to_frames_dir(self, processor, tmp_path):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=5.0)
jpgs = list(processor.frames_dir.glob("*.jpg"))
assert len(jpgs) == 1
def test_index_entry_has_required_fields(self, processor):
"""P2: index.json must match {id, timestamp, path, sent_to_agent} contract."""
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=12.5)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert len(index) == 1
entry = index[0]
assert "id" in entry
assert "timestamp" in entry
assert "path" in entry
assert "sent_to_agent" in entry
assert entry["sent_to_agent"] is False
assert entry["timestamp"] == 12.5
def test_id_format_is_F_zero_padded(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=1.0)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert index[0]["id"] == "F0001"
def test_sequential_ids(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
for i in range(3):
processor.on_raw_frame(jpeg, global_ts=float(i))
index = json.loads((processor.frames_dir / "index.json").read_text())
assert [e["id"] for e in index] == ["F0001", "F0002", "F0003"]
def test_fires_on_new_frames_callback(self, processor):
cb = MagicMock()
processor.set_on_new_frames(cb)
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=3.0)
cb.assert_called_once()
frames = cb.call_args[0][0]
assert len(frames) == 1
assert frames[0]["timestamp"] == 3.0
def test_path_in_index_is_absolute(self, processor):
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
processor.on_raw_frame(jpeg, global_ts=1.0)
index = json.loads((processor.frames_dir / "index.json").read_text())
assert Path(index[0]["path"]).is_absolute()
# -- P0: audio callback crash protection --
class TestAudioCallbackCrash:
def test_crashing_callback_does_not_kill_audio_thread(self, processor, tmp_path):
"""P0 regression: uncaught exception in on_new_audio must not kill the extractor thread."""
crash_count = [0]
call_count = [0]
def bad_callback(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
crash_count[0] += 1
raise RuntimeError("simulated callback crash")
processor._on_new_audio = bad_callback
# Write a fake WAV that passes the size check
wav = tmp_path / "chunk_0000.wav"
wav.write_bytes(b"\x00" * 200)
# Simulate what _audio_loop does after extracting a chunk
if processor._on_new_audio:
try:
processor._on_new_audio(wav, 0.0, 3.0, segment_path=wav, local_start=0.0)
except Exception as e:
pass # This is the OLD behavior — thread would die here
# With the fix, the exception is caught inside _audio_loop so thread stays alive.
# We test it by calling the protected path directly:
wav2 = tmp_path / "chunk_0001.wav"
wav2.write_bytes(b"\x00" * 200)
alive_after = [True]
def run_protected():
if processor._on_new_audio:
try:
processor._on_new_audio(wav2, 3.0, 3.0, segment_path=wav2, local_start=3.0)
except Exception:
alive_after[0] = False
run_protected()
# Callback was called twice — thread survived the first crash
assert call_count[0] == 2
assert alive_after[0] is True
# -- P0: wall-clock offset --
class TestWallClockOffset:
def test_offset_from_session_dir_name(self, tmp_path):
"""P0: wall-clock offset from session dir name must be close to actual elapsed time."""
from datetime import datetime
# Create a session dir named with "now"
now = datetime.now()
session_name = now.strftime("%Y%m%d_%H%M%S")
session_dir = tmp_path / session_name
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
offset = proc._wall_clock_offset()
# Should be within 2 seconds of 0 (just created)
assert 0.0 <= offset < 2.0
def test_offset_increases_with_time(self, tmp_path):
"""P0: offset must grow, not stay zero."""
from datetime import datetime, timedelta
# Simulate a session started 10 seconds ago
past = datetime.now() - timedelta(seconds=10)
session_name = past.strftime("%Y%m%d_%H%M%S")
session_dir = tmp_path / session_name
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
offset = proc._wall_clock_offset()
assert offset >= 9.0 # at least 9s (allow 1s tolerance)
def test_offset_falls_back_gracefully_on_bad_name(self, tmp_path):
"""P0 fragility: bad session dir name must not crash."""
session_dir = tmp_path / "not_a_timestamp"
session_dir.mkdir()
proc = SessionProcessor(session_dir)
proc.attach(get_recording_path=lambda: None, get_current_global_offset=lambda: 0.0)
# Should not raise
offset = proc._wall_clock_offset()
assert offset >= 0.0
# -- P0: flush frame deduplication --
class TestFlushFrameDeduplication:
def test_frames_within_100ms_are_skipped(self, processor):
"""P0 regression: flush frames within flush_window of scene frame must be dropped."""
received = []
processor.set_on_new_frames(lambda frames: received.extend(frames))
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
# Simulate the recorder's _read_stdout dedup logic:
# pts_time - last_pts < 0.1 → skip
last_pts = -1.0
threshold = 0.1
def emit_frame(pts_time):
nonlocal last_pts
if pts_time - last_pts < threshold:
return # flush frame, skip
last_pts = pts_time
processor.on_raw_frame(jpeg, global_ts=pts_time)
emit_frame(5.0) # scene frame — accepted
emit_frame(5.03) # flush frame 1 — < 100ms, skipped
emit_frame(5.06) # flush frame 2 — < 100ms, skipped
emit_frame(8.0) # next scene frame — accepted
assert len(received) == 2
assert received[0]["timestamp"] == 5.0
assert received[1]["timestamp"] == 8.0
def test_frames_beyond_100ms_are_accepted(self, processor):
"""Frames separated by > 100ms are distinct scenes, not flush frames."""
received = []
processor.set_on_new_frames(lambda frames: received.extend(frames))
jpeg = b"\xff\xd8\xff\xe0" + b"\x00" * 100 + b"\xff\xd9"
last_pts = -1.0
threshold = 0.1
def emit_frame(pts_time):
nonlocal last_pts
if pts_time - last_pts < threshold:
return
last_pts = pts_time
processor.on_raw_frame(jpeg, global_ts=pts_time)
emit_frame(5.0)
emit_frame(5.15) # > 100ms — separate scene, accepted
emit_frame(5.30)
assert len(received) == 3

View File

@@ -10,17 +10,19 @@ from cht.stream.tracker import RecordingTracker
class TestRecordingTracker:
def test_initial_duration_is_zero(self, tmp_path):
tracker = RecordingTracker(tmp_path / "rec.ts")
def test_initial_duration_is_zero(self):
tracker = RecordingTracker(get_segments=lambda: [])
assert tracker.duration == 0.0
def test_callback_called_on_update(self, tmp_path):
rec = tmp_path / "rec.ts"
rec.write_bytes(b"\x00" * 100_000)
seg = tmp_path / "rec.mp4"
seg.write_bytes(b"\x00" * 100_000)
cb = MagicMock()
tracker = RecordingTracker(rec, on_duration_update=cb)
tracker = RecordingTracker(
get_segments=lambda: [seg],
on_duration_update=cb,
)
with patch.object(tracker, "_probe_duration", return_value=10.0):
tracker.start()
time.sleep(3)
@@ -29,10 +31,44 @@ class TestRecordingTracker:
cb.assert_called()
assert cb.call_args[0][0] > 0
def test_no_callback_if_file_missing(self, tmp_path):
def test_no_callback_if_no_segments(self):
cb = MagicMock()
tracker = RecordingTracker(tmp_path / "nonexistent.ts", on_duration_update=cb)
tracker = RecordingTracker(get_segments=lambda: [], on_duration_update=cb)
tracker.start()
time.sleep(3)
tracker.stop()
cb.assert_not_called()
def test_no_callback_if_file_missing(self, tmp_path):
cb = MagicMock()
tracker = RecordingTracker(
get_segments=lambda: [tmp_path / "nonexistent.mp4"],
on_duration_update=cb,
)
tracker.start()
time.sleep(3)
tracker.stop()
cb.assert_not_called()
def test_duration_only_increases(self, tmp_path):
seg = tmp_path / "rec.mp4"
seg.write_bytes(b"\x00" * 100_000)
durations = []
def on_update(d):
durations.append(d)
probe_values = iter([5.0, 3.0, 7.0]) # 3.0 is a regression — should be ignored
tracker = RecordingTracker(
get_segments=lambda: [seg],
on_duration_update=on_update,
)
with patch.object(tracker, "_probe_duration", side_effect=probe_values):
tracker.start()
time.sleep(7)
tracker.stop()
# Duration should never go backwards
for i in range(1, len(durations)):
assert durations[i] >= durations[i - 1], "Duration regressed"