proper tests

This commit is contained in:
2026-04-10 18:29:58 -03:00
parent e906b0a963
commit ea9dbf8772
16 changed files with 1077 additions and 15 deletions

2
.gitignore vendored
View File

@@ -7,3 +7,5 @@ __pycache__/
.pytest_cache/ .pytest_cache/
media/target/ media/target/
media/logs/ media/logs/
tests/fixtures/*.mp4
tests/fixtures/*.wav

View File

@@ -1,3 +1,4 @@
import argparse
import logging import logging
import os import os
import signal import signal
@@ -55,6 +56,11 @@ class ChtApp(Adw.Application):
win = ChtWindow(application=self) win = ChtWindow(application=self)
win.present() win.present()
# Auto-connect for E2E testing: --auto-connect
# Delay gives the GUI time to fully render before starting the stream.
if os.environ.get("_CHT_AUTO_CONNECT") == "1" and not win._lifecycle.is_streaming:
GLib.timeout_add(2000, lambda: win._on_connect_clicked(None) or False)
_STDERR_SKIP = [b"eglExportDMABUFImage"] _STDERR_SKIP = [b"eglExportDMABUFImage"]
@@ -89,6 +95,16 @@ def _filter_stderr():
def main(): def main():
parser = argparse.ArgumentParser(description="CHT — Stream Viewer + Agent")
parser.add_argument("--auto-connect", action="store_true", help="Connect on startup")
parser.add_argument("--python", action="store_true", help="Use Python transport (default)")
parser.add_argument("--rust", action="store_true", help="Use Rust transport")
args, gtk_args = parser.parse_known_args()
# Store parsed options so do_activate can read them
os.environ["_CHT_AUTO_CONNECT"] = "1" if args.auto_connect else "0"
os.environ["_CHT_RUST_TRANSPORT"] = "1" if args.rust else "0"
_filter_stderr() _filter_stderr()
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG, level=logging.DEBUG,
@@ -96,9 +112,10 @@ def main():
datefmt="%H:%M:%S", datefmt="%H:%M:%S",
) )
log = logging.getLogger("cht") log = logging.getLogger("cht")
log.info("CHT starting") log.info("CHT starting (transport=%s, auto_connect=%s)",
"rust" if args.rust else "python", args.auto_connect)
app = ChtApp() app = ChtApp()
return app.run(sys.argv) return app.run([sys.argv[0]] + gtk_args)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,6 +1,7 @@
"""Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering.""" """Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering."""
import logging import logging
import time
from threading import Thread from threading import Thread
from gi.repository import GLib from gi.repository import GLib
@@ -66,6 +67,7 @@ class StreamLifecycle:
""" """
self._streaming = True self._streaming = True
self._gone_live = False self._gone_live = False
self._start_monotonic = time.monotonic()
self._rust_transport = rust_transport self._rust_transport = rust_transport
if rust_transport: if rust_transport:
@@ -177,13 +179,16 @@ class StreamLifecycle:
GLib.idle_add(self._go_live_once) GLib.idle_add(self._go_live_once)
if self._stream_mgr: if self._stream_mgr:
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames) self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
if self._stream_mgr:
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
def _go_live_once(self): def _go_live_once(self):
if self._stream_mgr: if self._stream_mgr:
elapsed = time.monotonic() - self._start_monotonic
log.info("Going LIVE (startup delay elapsed)") log.info("Going LIVE (startup delay elapsed)")
self._timeline.go_live() self._timeline.go_live()
if self._stream_mgr.telemetry:
self._stream_mgr.telemetry.metric("first_live", {
"elapsed_s": round(elapsed, 2),
})
return False return False
def _tick_live(self): def _tick_live(self):

View File

@@ -58,7 +58,7 @@ class StreamManager:
self.agent_dir = self.session_dir / "agent" self.agent_dir = self.session_dir / "agent"
self.readonly = False self.readonly = False
self.telemetry = None self._telemetry = None
self.recorder = StreamRecorder(self.session_dir) self.recorder = StreamRecorder(self.session_dir)
self.processor = SessionProcessor(self.session_dir) self.processor = SessionProcessor(self.session_dir)
@@ -88,7 +88,7 @@ class StreamManager:
mgr.audio_dir = session_dir / "audio" mgr.audio_dir = session_dir / "audio"
mgr.agent_dir = session_dir / "agent" mgr.agent_dir = session_dir / "agent"
mgr.readonly = False mgr.readonly = False
mgr.telemetry = None mgr._telemetry = None
# No recorder — Rust server owns transport + recording. # No recorder — Rust server owns transport + recording.
mgr.recorder = None mgr.recorder = None
@@ -121,6 +121,16 @@ class StreamManager:
session_id, len(mgr.recorder.recording_segments), mgr.frame_count) session_id, len(mgr.recorder.recording_segments), mgr.frame_count)
return mgr return mgr
@property
def telemetry(self):
return self._telemetry
@telemetry.setter
def telemetry(self, val):
self._telemetry = val
if self.processor:
self.processor._telemetry = val
# -- Recorder delegation -- # -- Recorder delegation --
@property @property

View File

@@ -51,6 +51,7 @@ class SessionProcessor:
self._get_recording_path = None self._get_recording_path = None
self._get_current_global_offset = None self._get_current_global_offset = None
self._telemetry = None
def attach(self, get_recording_path, get_current_global_offset): def attach(self, get_recording_path, get_current_global_offset):
"""Wire up callbacks to query the recorder's current state.""" """Wire up callbacks to query the recorder's current state."""
@@ -75,6 +76,11 @@ class SessionProcessor:
self._append_frame_index(entry) self._append_frame_index(entry)
log.info("Scene frame: %s at %.1fs", frame_id, global_ts) log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
if self._telemetry:
self._telemetry.metric("scene_frame", {
"id": frame_id, "global_ts": global_ts,
})
if self._on_new_frames: if self._on_new_frames:
self._on_new_frames([entry]) self._on_new_frames([entry])
@@ -465,6 +471,15 @@ class SessionProcessor:
if safe_duration is None or safe_duration <= 0: if safe_duration is None or safe_duration <= 0:
continue continue
# Raw AAC files (from Rust server) have no reliable duration metadata.
# ffprobe wildly overestimates (e.g. 1569s for a 50s session).
# Cap to wall-clock elapsed time as a sanity bound.
wall_elapsed = self._wall_clock_offset()
if wall_elapsed > 0 and safe_duration > wall_elapsed * 1.5:
log.debug("Audio: capping probed duration %.1fs to wall-clock %.1fs",
safe_duration, wall_elapsed)
safe_duration = wall_elapsed
# Fail-safe: processed_time can accumulate past the file if the # Fail-safe: processed_time can accumulate past the file if the
# source was recreated (e.g. server restarted same session). # source was recreated (e.g. server restarted same session).
if processed_time > safe_duration: if processed_time > safe_duration:
@@ -495,6 +510,11 @@ class SessionProcessor:
global_start = processed_time + offset global_start = processed_time + offset
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)", log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
wav_path.name, processed_time, process_to, global_start) wav_path.name, processed_time, process_to, global_start)
if self._telemetry:
self._telemetry.metric("audio_chunk", {
"chunk": chunk_num, "start": processed_time,
"end": process_to, "global_start": global_start,
})
if self._on_new_audio: if self._on_new_audio:
try: try:
self._on_new_audio( self._on_new_audio(

View File

@@ -1,6 +1,7 @@
"""Main application window — wires Timeline to all components.""" """Main application window — wires Timeline to all components."""
import logging import logging
import os
from pathlib import Path from pathlib import Path
import gi import gi
@@ -301,7 +302,8 @@ class ChtWindow(Adw.ApplicationWindow):
self._connect_btn.remove_css_class("suggested-action") self._connect_btn.remove_css_class("suggested-action")
self._connect_btn.add_css_class("destructive-action") self._connect_btn.add_css_class("destructive-action")
mgr = self._lifecycle.start(session_id=session_id, rust_transport=True) rust = os.environ.get("_CHT_RUST_TRANSPORT", "0") == "1"
mgr = self._lifecycle.start(session_id=session_id, rust_transport=rust)
if mgr is None: if mgr is None:
log.error("Failed to start stream — no cht-server session found") log.error("Failed to start stream — no cht-server session found")
self._connect_btn.set_label("Connect") self._connect_btn.set_label("Connect")

372
ctrl/bench.py Normal file
View File

@@ -0,0 +1,372 @@
#!/usr/bin/env python3
"""Post-hoc session benchmark — extract timing metrics from session logs.
Usage:
python ctrl/bench.py --session data/sessions/20260410_160441
python ctrl/bench.py --latest # pick most recent session
python ctrl/bench.py --latest --json # machine-readable output
Parses telemetry.jsonl, session.log, frames/index.json, and (if present)
media/logs/server.log. No live session required — works on finished sessions.
Metrics:
M1 Full startup Connect → first transcript
M1a → first duration update
M1b → first scene frame
M1c → first audio chunk
M1d → first transcript
M5 Audio lag How far audio extraction trails real-time
M6 Transcript lag Time from audio ready to transcript done
M7 Frame throughput Scene frames per minute
M9 Recorder health Unexpected restarts / segment rotations
"""
import argparse
import json
import logging
import re
import sys
from datetime import datetime
from pathlib import Path
log = logging.getLogger("bench")
PROJECT_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = PROJECT_DIR / "data"
SESSIONS_DIR = DATA_DIR / "sessions"
def parse_log_time(line: str) -> float | None:
"""Parse HH:MM:SS from session.log line → seconds since midnight."""
m = re.match(r"(\d{2}):(\d{2}):(\d{2})", line)
if m:
return int(m[1]) * 3600 + int(m[2]) * 60 + int(m[3])
return None
def load_telemetry(session_dir: Path) -> list[dict]:
path = session_dir / "telemetry.jsonl"
if not path.exists():
return []
entries = []
for line in path.read_text().splitlines():
if line.strip():
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
pass
return entries
def load_session_log(session_dir: Path) -> list[str]:
path = session_dir / "session.log"
return path.read_text().splitlines() if path.exists() else []
def load_frames_index(session_dir: Path) -> list[dict]:
path = session_dir / "frames" / "index.json"
if not path.exists():
return []
try:
return json.loads(path.read_text())
except (json.JSONDecodeError, ValueError):
return []
def find_first_line(lines: list[str], pattern: str) -> tuple[float | None, str | None]:
"""Find first line matching pattern. Returns (time_seconds, full_line)."""
for line in lines:
if pattern in line:
return parse_log_time(line), line
return None, None
def extract_metrics(session_dir: Path) -> dict:
tel = load_telemetry(session_dir)
log_lines = load_session_log(session_dir)
frames = load_frames_index(session_dir)
metrics = {}
# Session start time (from telemetry t=0 wall clock, or first log line)
session_start_t = None
for entry in tel:
if entry.get("name") == "session_start":
session_start_t = parse_log_time(log_lines[0]) if log_lines else None
break
# Session end
session_duration = None
for entry in tel:
if entry.get("name") == "session_end":
session_duration = entry.get("t")
metrics["session_duration_s"] = session_duration
# M1a: start → first duration update
t_start = session_start_t
t_duration, _ = find_first_line(log_lines, "Duration:")
if t_start is not None and t_duration is not None:
metrics["M1a_first_duration_s"] = t_duration - t_start
# M1b: start → first scene frame
t_frame, _ = find_first_line(log_lines, "Scene frame:")
if t_start is not None and t_frame is not None:
metrics["M1b_first_scene_frame_s"] = t_frame - t_start
# M1c: start → first audio chunk
t_audio, _ = find_first_line(log_lines, "Audio chunk:")
if t_start is not None and t_audio is not None:
metrics["M1c_first_audio_chunk_s"] = t_audio - t_start
# M1d: start → first transcript (whisper processing)
t_transcript, _ = find_first_line(log_lines, "faster_whisper: Processing audio")
if t_start is not None and t_transcript is not None:
metrics["M1d_first_transcript_s"] = t_transcript - t_start
# M1: full startup = start → first transcript
if "M1d_first_transcript_s" in metrics:
metrics["M1_full_startup_s"] = metrics["M1d_first_transcript_s"]
# Going LIVE time
t_live, _ = find_first_line(log_lines, "Going LIVE")
if t_start is not None and t_live is not None:
metrics["going_live_s"] = t_live - t_start
# M5: Audio extraction lag
# Parse "Audio chunk: chunk_N (Xs → Ys, global Zs)" lines
audio_lags = []
for line in log_lines:
m = re.search(r"Audio chunk: \S+ \(([\d.]+)s → ([\d.]+)s, global ([\d.]+)s\)", line)
if m:
end_time = float(m[2])
log_t = parse_log_time(line)
if log_t is not None and t_start is not None:
wall_elapsed = log_t - t_start
lag = wall_elapsed - end_time
if lag >= 0:
audio_lags.append(lag)
if audio_lags:
metrics["M5_audio_lag_avg_s"] = round(sum(audio_lags) / len(audio_lags), 1)
metrics["M5_audio_lag_max_s"] = round(max(audio_lags), 1)
metrics["M5_audio_lag_min_s"] = round(min(audio_lags), 1)
metrics["M5_audio_chunk_count"] = len(audio_lags)
# M6: Transcription lag
# Parse faster_whisper "Processing audio with duration MM:SS.mmm" or "HH:MM:SS.mmm"
transcript_durations = []
for line in log_lines:
# MM:SS.mmm format (e.g., 00:06.145)
m = re.search(r"faster_whisper: Processing audio with duration (\d+):([\d.]+)$", line)
if m:
dur = int(m[1]) * 60 + float(m[2])
transcript_durations.append(dur)
continue
# HH:MM:SS.mmm format
m = re.search(r"faster_whisper: Processing audio with duration (\d+):(\d+):([\d.]+)", line)
if m:
dur = int(m[1]) * 3600 + int(m[2]) * 60 + float(m[3])
transcript_durations.append(dur)
if transcript_durations:
metrics["M6_whisper_processing_avg_s"] = round(sum(transcript_durations) / len(transcript_durations), 1)
metrics["M6_transcript_count"] = len(transcript_durations)
# M7: Frame throughput
if frames and session_duration and session_duration > 0:
minutes = session_duration / 60
metrics["M7_frame_throughput_per_min"] = round(len(frames) / minutes, 1)
metrics["M7_total_frames"] = len(frames)
# M9: Recorder health
restarts = sum(1 for l in log_lines if "Recorder died" in l)
segments = sum(1 for l in log_lines if "Restarting recorder" in l)
metrics["M9_recorder_restarts"] = restarts
metrics["M9_segment_rotations"] = segments
# Scene detection mode
if any("Scene detector: connecting" in l for l in log_lines):
metrics["scene_mode"] = "rust_relay"
elif any("Recorder+scene: pid=" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
else:
metrics["scene_mode"] = "unknown"
# Transport mode — check for Rust-specific markers
if any("Rust session dir" in l or "Attached to Rust session" in l for l in log_lines):
metrics["transport"] = "rust"
elif any("Recorder+scene: pid=" in l for l in log_lines):
metrics["transport"] = "python"
else:
# Check file signatures: Rust writes audio.aac separately, Python muxes into fMP4
aac = session_dir / "stream" / "audio.aac"
if aac.exists():
metrics["transport"] = "rust"
elif any("run_async:" in l for l in log_lines):
metrics["transport"] = "python"
else:
metrics["transport"] = "unknown"
# Scene mode from log markers
if metrics.get("scene_mode") == "unknown":
if any("Recorder+scene: pid=" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
elif any("run_async:" in l for l in log_lines):
metrics["scene_mode"] = "python_single_process"
return metrics
def print_report(session_dir: Path, metrics: dict):
log.info("=" * 60)
log.info(" CHT Benchmark Report")
log.info(" Session: %s", session_dir.name)
log.info(" Transport: %s", metrics.get("transport", "?"))
log.info(" Scene mode: %s", metrics.get("scene_mode", "?"))
log.info(" Duration: %ss", metrics.get("session_duration_s", "?"))
log.info("=" * 60)
rows = [
("M1", "Full startup", "M1_full_startup_s", "s"),
("M1a", " → first duration", "M1a_first_duration_s", "s"),
("M1b", " → first scene frame", "M1b_first_scene_frame_s", "s"),
("M1c", " → first audio chunk", "M1c_first_audio_chunk_s", "s"),
("M1d", " → first transcript", "M1d_first_transcript_s", "s"),
("", " → going live", "going_live_s", "s"),
("M5", "Audio lag (avg)", "M5_audio_lag_avg_s", "s"),
("M5", "Audio lag (max)", "M5_audio_lag_max_s", "s"),
("M5", "Audio chunks", "M5_audio_chunk_count", ""),
("M6", "Whisper processing (avg)", "M6_whisper_processing_avg_s", "s"),
("M6", "Transcripts produced", "M6_transcript_count", ""),
("M7", "Frame throughput", "M7_frame_throughput_per_min", "/min"),
("M7", "Total frames", "M7_total_frames", ""),
("M9", "Recorder restarts", "M9_recorder_restarts", ""),
("M9", "Segment rotations", "M9_segment_rotations", ""),
]
for code, label, key, unit in rows:
val = metrics.get(key)
if val is not None:
log.info(" %4s %28s %s%s", code, label, val, unit)
else:
log.info(" %4s %28s -", code, label)
def compare_ground_truth(session_dir: Path, gt: dict) -> dict:
"""Compare detected scene frames against ground truth scene changes."""
frames = load_frames_index(session_dir)
gt_scenes = gt.get("scenes", [])
if not frames or not gt_scenes:
return {"error": "no frames or no ground truth scenes"}
detected_ts = sorted(f["timestamp"] for f in frames)
expected_ts = sorted(s["timestamp_s"] for s in gt_scenes)
# For each expected scene change, find the closest detected frame
matches = []
for exp_ts in expected_ts:
best = None
best_delta = float("inf")
for det_ts in detected_ts:
delta = det_ts - exp_ts
if abs(delta) < abs(best_delta):
best_delta = delta
best = det_ts
matches.append({
"expected_s": exp_ts,
"detected_s": best,
"delta_s": round(best_delta, 3) if best is not None else None,
})
deltas = [m["delta_s"] for m in matches if m["delta_s"] is not None]
return {
"expected_scenes": len(expected_ts),
"detected_frames": len(detected_ts),
"matches": matches,
"avg_delta_s": round(sum(deltas) / len(deltas), 3) if deltas else None,
"max_delta_s": round(max(abs(d) for d in deltas), 3) if deltas else None,
"missed": sum(1 for m in matches if m["delta_s"] is None or abs(m["delta_s"]) > 10),
}
def print_ground_truth_report(gt: dict):
log.info("")
log.info(" Scene detection vs ground truth:")
log.info(" Expected scenes: %d", gt.get("expected_scenes", 0))
log.info(" Detected frames: %d", gt.get("detected_frames", 0))
if gt.get("avg_delta_s") is not None:
log.info(" Avg detection delta: %ss", gt["avg_delta_s"])
log.info(" Max detection delta: %ss", gt["max_delta_s"])
if gt.get("missed", 0) > 0:
log.warning(" Missed scenes: %d", gt["missed"])
for m in gt.get("matches", []):
status = "OK" if m["delta_s"] is not None and abs(m["delta_s"]) < 5 else "MISS"
det = f"{m['detected_s']:.1f}s" if m["detected_s"] is not None else "---"
delta = f"+{m['delta_s']:.1f}s" if m["delta_s"] is not None else ""
log.info(" %4s expected=%5.1fs detected=%s %s", status, m["expected_s"], det, delta)
def find_latest_session() -> Path | None:
if not SESSIONS_DIR.exists():
return None
dirs = sorted(SESSIONS_DIR.iterdir(), reverse=True)
for d in dirs:
if d.is_dir() and (d / "telemetry.jsonl").exists():
return d
return None
def main():
parser = argparse.ArgumentParser(description="CHT session benchmark")
parser.add_argument("--session", type=Path, help="Path to session directory")
parser.add_argument("--latest", action="store_true", help="Use most recent session")
parser.add_argument("--json", action="store_true", help="Output JSON instead of table")
parser.add_argument("--ground-truth", type=Path, help="Ground truth JSON for scene comparison")
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
if args.latest:
session_dir = find_latest_session()
if not session_dir:
log.error("No sessions found")
sys.exit(1)
elif args.session:
session_dir = args.session
else:
parser.print_help()
sys.exit(1)
if not session_dir.exists():
log.error("Session not found: %s", session_dir)
sys.exit(1)
metrics = extract_metrics(session_dir)
metrics["session_id"] = session_dir.name
# Ground truth comparison
if args.ground_truth and args.ground_truth.exists():
gt = json.loads(args.ground_truth.read_text())
comparison = compare_ground_truth(session_dir, gt)
metrics["ground_truth"] = comparison
if args.json:
sys.stdout.write(json.dumps(metrics, indent=2) + "\n")
else:
print_report(session_dir, metrics)
if "ground_truth" in metrics:
print_ground_truth_report(metrics["ground_truth"])
# Save report
bench_dir = DATA_DIR / "bench"
bench_dir.mkdir(parents=True, exist_ok=True)
report_path = bench_dir / f"{session_dir.name}.json"
report_path.write_text(json.dumps(metrics, indent=2))
if __name__ == "__main__":
main()

164
ctrl/bench_delay.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""Live scene detection latency benchmark (M4).
Measures time from a triggered visual change on the sender to a new JPEG
appearing in the receiver's frames/ directory.
Usage (run on receiver, sender accessible via SSH):
python ctrl/bench_delay.py --session-dir data/sessions/CURRENT --sender mariano@sender
python ctrl/bench_delay.py --frames-dir data/sessions/CURRENT/frames --sender mariano@sender
How it works:
1. Records the current frame count in frames/index.json
2. SSH to sender, triggers a visual change (xdotool workspace switch)
3. Polls frames/index.json for a new entry (or watches via mtime)
4. Measures wall-clock difference = scene detection latency
For repeated measurements, use --repeat N with --interval S between triggers.
"""
import argparse
import json
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
log = logging.getLogger("bench_delay")
def get_frame_count(frames_dir: Path) -> int:
index = frames_dir / "index.json"
if not index.exists():
return 0
try:
return len(json.loads(index.read_text()))
except (json.JSONDecodeError, ValueError):
return 0
def get_latest_frame_mtime(frames_dir: Path) -> float:
index = frames_dir / "index.json"
if not index.exists():
return 0.0
return index.stat().st_mtime
def trigger_scene_change(sender: str, method: str = "workspace") -> float:
"""Trigger a visual change on the sender. Returns wall-clock time of trigger."""
if method == "workspace":
# xdotool switch workspace — causes a full-screen visual change
cmd = ["ssh", sender, "DISPLAY=:0 xdotool key super+Right"]
elif method == "color":
# Flash a fullscreen color using xterm (more dramatic change)
cmd = ["ssh", sender,
"DISPLAY=:0 bash -c 'xterm -fullscreen -bg red -e sleep 0.5 &'"]
else:
log.error("Unknown trigger method: %s", method)
sys.exit(1)
t = time.monotonic()
wall = time.time()
try:
subprocess.run(cmd, timeout=5, capture_output=True)
except subprocess.TimeoutExpired:
log.warning("SSH trigger timed out")
return wall
def wait_for_new_frame(frames_dir: Path, initial_count: int,
timeout: float = 15.0, poll_interval: float = 0.1) -> float | None:
"""Wait for a new frame to appear. Returns wall-clock time when detected, or None."""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
count = get_frame_count(frames_dir)
if count > initial_count:
return time.time()
time.sleep(poll_interval)
return None
def run_measurement(frames_dir: Path, sender: str, method: str) -> dict:
initial_count = get_frame_count(frames_dir)
trigger_wall = trigger_scene_change(sender, method)
detected_wall = wait_for_new_frame(frames_dir, initial_count)
if detected_wall is None:
return {"trigger_wall": trigger_wall, "latency_s": None, "timed_out": True}
latency = detected_wall - trigger_wall
return {
"trigger_wall": trigger_wall,
"detected_wall": detected_wall,
"latency_s": round(latency, 3),
"timed_out": False,
}
def main():
parser = argparse.ArgumentParser(description="Scene detection latency benchmark")
parser.add_argument("--frames-dir", type=Path, help="Path to frames/ directory")
parser.add_argument("--session-dir", type=Path, help="Path to session directory")
parser.add_argument("--sender", required=True, help="SSH target for sender (user@host)")
parser.add_argument("--method", default="workspace", choices=["workspace", "color"],
help="How to trigger visual change")
parser.add_argument("--repeat", type=int, default=3, help="Number of measurements")
parser.add_argument("--interval", type=float, default=5.0, help="Seconds between triggers")
parser.add_argument("--json", action="store_true", help="Output JSON")
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
if args.session_dir:
frames_dir = args.session_dir / "frames"
elif args.frames_dir:
frames_dir = args.frames_dir
else:
parser.error("Provide --frames-dir or --session-dir")
return
if not frames_dir.exists():
log.error("Frames dir not found: %s", frames_dir)
sys.exit(1)
results = []
for i in range(args.repeat):
if i > 0:
time.sleep(args.interval)
log.info("Trigger %d/%d...", i + 1, args.repeat)
r = run_measurement(frames_dir, args.sender, args.method)
if r["timed_out"]:
log.warning("TIMEOUT (no frame in 15s)")
else:
log.info(" latency: %ss", r["latency_s"])
results.append(r)
latencies = [r["latency_s"] for r in results if r["latency_s"] is not None]
if args.json:
print(json.dumps({"measurements": results, "summary": {
"count": len(latencies),
"avg_s": round(sum(latencies) / len(latencies), 3) if latencies else None,
"min_s": round(min(latencies), 3) if latencies else None,
"max_s": round(max(latencies), 3) if latencies else None,
"timeouts": sum(1 for r in results if r["timed_out"]),
}}, indent=2))
else:
log.info("M4 Scene detection latency:")
if latencies:
log.info(" avg: %.1fs", sum(latencies) / len(latencies))
log.info(" min: %.1fs", min(latencies))
log.info(" max: %.1fs", max(latencies))
timeouts = sum(1 for r in results if r["timed_out"])
if timeouts:
log.warning(" timeouts: %d/%d", timeouts, len(results))
if __name__ == "__main__":
main()

25
ctrl/client.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
# Start the client (sender) — Python or Rust mode.
#
# Usage:
# ctrl/client.sh --python [RECEIVER_IP] [PORT] # kmsgrab + mpegts (default port 4444)
# ctrl/client.sh --rust [server_addr] # Rust framed protocol (default mcrndeb:4447)
#
# Default: --python
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
MODE="python"
# Parse mode flag
if [[ "${1:-}" == "--python" ]]; then
MODE="python"; shift
elif [[ "${1:-}" == "--rust" ]]; then
MODE="rust"; shift
fi
if [ "$MODE" = "rust" ]; then
exec "$PROJECT_DIR/media/ctrl/client.sh" "$@"
else
exec sudo python3 "$PROJECT_DIR/sender/stream_av.py" "$@"
fi

116
ctrl/e2e_test.sh Executable file
View File

@@ -0,0 +1,116 @@
#!/bin/bash
# E2E benchmark test — fully automated, run from the SENDER machine.
#
# Starts everything via SSH, captures test video, collects results.
#
# Usage:
# ctrl/e2e_test.sh --python [--duration 30]
# ctrl/e2e_test.sh --rust [--duration 30]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
cd "$PROJECT_DIR"
DURATION=30
INTERVAL=5
RECEIVER="mcrndeb"
RDIR="wdir/cht"
MODE="python"
PLAY_DELAY=3
while [[ $# -gt 0 ]]; do
case $1 in
--python) MODE="python"; shift ;;
--rust) MODE="rust"; shift ;;
--duration) DURATION="$2"; shift 2 ;;
--interval) INTERVAL="$2"; shift 2 ;;
--receiver) RECEIVER="$2"; shift 2 ;;
--play-delay) PLAY_DELAY="$2"; shift 2 ;;
*) echo "Unknown arg: $1"; exit 1 ;;
esac
done
VIDEO="tests/fixtures/test_scene_${DURATION}s.mp4"
GROUND_TRUTH="tests/fixtures/test_scene_${DURATION}s_ground_truth.json"
# Generate test video if needed
if [ ! -f "$VIDEO" ]; then
echo "=== Generating test video ==="
python3 ctrl/gen_test_video.py --duration "$DURATION" --interval "$INTERVAL"
fi
# PIDs to clean up
PIDS=()
cleanup() {
echo "=== Cleaning up ==="
for pid in "${PIDS[@]}"; do
kill "$pid" 2>/dev/null || true
done
# Kill sudo'd client
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
# Stop remote processes
ssh "$RECEIVER" "pkill -f 'cht-server|cht.app' 2>/dev/null" || true
wait 2>/dev/null
}
trap cleanup EXIT INT TERM
echo "=== E2E test: $MODE mode, ${DURATION}s ==="
# Step 1: Start receiver side (on mcrndeb via SSH)
if [ "$MODE" = "rust" ]; then
echo "--- Starting Rust server on $RECEIVER ---"
ssh -tt "$RECEIVER" "cd $RDIR && ctrl/server.sh" &
PIDS+=($!)
sleep 2
echo "--- Starting app on $RECEIVER (rust transport, auto-connect) ---"
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=1 ctrl/app.sh" &
PIDS+=($!)
sleep 3
else
echo "--- Starting app on $RECEIVER (python transport, auto-connect) ---"
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=0 ctrl/app.sh" &
PIDS+=($!)
sleep 3
fi
# Step 2: Play test video fullscreen on sender
echo "--- Playing test video fullscreen ---"
mpv --fullscreen --loop-file=inf --no-terminal "$VIDEO" &
PIDS+=($!)
sleep "$PLAY_DELAY"
# Step 3: Start client (sender)
echo "--- Starting $MODE client → $RECEIVER ---"
if [ "$MODE" = "rust" ]; then
ctrl/client.sh --rust "${RECEIVER}:4447" &
else
ctrl/client.sh --python "$RECEIVER" &
fi
PIDS+=($!)
# Step 4: Wait for capture + processing
WAIT=$(( DURATION + 15 ))
echo "--- Waiting ${WAIT}s for capture + processing ---"
sleep "$WAIT"
# Step 5: Stop sender side
echo "--- Stopping sender ---"
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
kill "${PIDS[-1]}" 2>/dev/null || true # mpv
sleep 2
# Step 6: Stop receiver side
echo "--- Stopping receiver ---"
ssh "$RECEIVER" "pkill -f 'cht.app' 2>/dev/null" || true
sleep 2
ssh "$RECEIVER" "pkill -f 'cht-server' 2>/dev/null" || true
sleep 1
# Step 7: Benchmark
echo ""
echo "=== Benchmark results ($MODE) ==="
ssh "$RECEIVER" "cd $RDIR && python3 ctrl/bench.py --latest --ground-truth $GROUND_TRUTH"
echo ""
echo "JSON: ssh $RECEIVER 'cd $RDIR && python3 ctrl/bench.py --latest --json --ground-truth $GROUND_TRUTH'"

148
ctrl/gen_test_video.py Executable file
View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""Generate a test video with known scene changes and audio markers.
The video is played fullscreen on the sender while kmsgrab captures it —
simulating a meeting with deterministic, reproducible content.
- Scene changes: solid color blocks every INTERVAL seconds
- Overlay: large elapsed-seconds counter for visual sync measurement
- Audio: sine tone (changes frequency each scene for transcription ground truth)
Outputs:
tests/fixtures/test_scene_30s.mp4
tests/fixtures/test_scene_30s_ground_truth.json
Usage:
python ctrl/gen_test_video.py [--duration 30] [--interval 5]
"""
import argparse
import json
import logging
import subprocess
import sys
from pathlib import Path
log = logging.getLogger("gen_test_video")
PROJECT_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"
# Scene colors (RGB hex)
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]
# Speech sample (Harvard sentences, public domain, Open Speech Repository)
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
def ensure_speech_sample():
"""Download speech sample if not present."""
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
if SPEECH_SAMPLE.exists():
return
log.info("Downloading speech sample from Open Speech Repository...")
import urllib.request
urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
log.info("Saved: %s", SPEECH_SAMPLE)
def generate(duration: int, interval: int):
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
ensure_speech_sample()
video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"
num_scenes = duration // interval
nc = len(COLORS)
# Video: colored segments with timer overlay, concatenated.
# Audio: speech sample looped to fill duration (real speech for whisper testing).
filter_parts = []
for i in range(num_scenes):
color = COLORS[i % nc]
seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
offset = i * interval
filter_parts.append(
f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
f"borderw=6:bordercolor=black"
f"[v{i}]"
)
v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
# Audio: speech sample is input 1 (input 0 is the lavfi dummy)
filter_parts.append(
f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
f"atrim=0:{duration},volume=0.8[aout]"
)
filter_complex = ";\n".join(filter_parts)
cmd = [
"ffmpeg", "-y",
"-f", "lavfi", "-i", "anullsrc", # dummy (video segments come from filter)
*[arg for i in range(num_scenes) for arg in []], # no extra inputs needed for video
"-i", str(SPEECH_SAMPLE), # speech audio input
"-filter_complex", filter_complex,
"-map", "[vout]", "-map", "[aout]",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
"-g", "30", "-keyint_min", "30",
"-c:a", "aac", "-b:a", "128k",
"-t", str(duration),
str(video_path),
"-hide_banner", "-loglevel", "warning",
]
log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
log.info("Output: %s", video_path)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log.error("ffmpeg failed:\n%s", result.stderr)
sys.exit(1)
log.info("Video generated: %s", video_path)
# Write ground truth
scenes = []
for i in range(num_scenes):
scenes.append({
"scene_index": i,
"timestamp_s": i * interval,
"color_hex": COLORS[i % nc],
})
truth = {
"duration_s": duration,
"interval_s": interval,
"num_scenes": num_scenes,
"video_path": str(video_path),
"scenes": scenes,
}
truth_path.write_text(json.dumps(truth, indent=2))
log.info("Ground truth: %s", truth_path)
return video_path, truth_path
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(description="Generate CHT test video")
parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
args = parser.parse_args()
generate(args.duration, args.interval)
if __name__ == "__main__":
main()

View File

@@ -1,8 +0,0 @@
#!/bin/bash
# Start the sender on this machine
# Usage: ./sender.sh RECEIVER_IP [PORT]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
exec sudo "$PROJECT_DIR/sender/stream_av.sh" "$@"

8
ctrl/server.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/bash
# Start the Rust media server (receiver).
# Thin wrapper around media/ctrl/server.sh.
# Usage: ctrl/server.sh [port]
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
exec "$PROJECT_DIR/media/ctrl/server.sh" "$@"

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,38 @@
{
"duration_s": 30,
"interval_s": 5,
"num_scenes": 6,
"video_path": "/home/mariano/wdir/cht/tests/fixtures/test_scene_30s.mp4",
"scenes": [
{
"scene_index": 0,
"timestamp_s": 0,
"color_hex": "FF0000"
},
{
"scene_index": 1,
"timestamp_s": 5,
"color_hex": "0000FF"
},
{
"scene_index": 2,
"timestamp_s": 10,
"color_hex": "00FF00"
},
{
"scene_index": 3,
"timestamp_s": 15,
"color_hex": "FFFF00"
},
{
"scene_index": 4,
"timestamp_s": 20,
"color_hex": "FF00FF"
},
{
"scene_index": 5,
"timestamp_s": 25,
"color_hex": "00FFFF"
}
]
}

1
tests/fixtures/test_speech.ogg vendored Normal file
View File

@@ -0,0 +1 @@
File not found: /v1/AUTH_mw/wikipedia-commons-local-public.22/2/22/En-us-United_States_Constitution-Article_1-Section_1.ogg