proper tests
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -7,3 +7,5 @@ __pycache__/
|
|||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
media/target/
|
media/target/
|
||||||
media/logs/
|
media/logs/
|
||||||
|
tests/fixtures/*.mp4
|
||||||
|
tests/fixtures/*.wav
|
||||||
|
|||||||
21
cht/app.py
21
cht/app.py
@@ -1,3 +1,4 @@
|
|||||||
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
@@ -55,6 +56,11 @@ class ChtApp(Adw.Application):
|
|||||||
win = ChtWindow(application=self)
|
win = ChtWindow(application=self)
|
||||||
win.present()
|
win.present()
|
||||||
|
|
||||||
|
# Auto-connect for E2E testing: --auto-connect
|
||||||
|
# Delay gives the GUI time to fully render before starting the stream.
|
||||||
|
if os.environ.get("_CHT_AUTO_CONNECT") == "1" and not win._lifecycle.is_streaming:
|
||||||
|
GLib.timeout_add(2000, lambda: win._on_connect_clicked(None) or False)
|
||||||
|
|
||||||
|
|
||||||
_STDERR_SKIP = [b"eglExportDMABUFImage"]
|
_STDERR_SKIP = [b"eglExportDMABUFImage"]
|
||||||
|
|
||||||
@@ -89,6 +95,16 @@ def _filter_stderr():
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="CHT — Stream Viewer + Agent")
|
||||||
|
parser.add_argument("--auto-connect", action="store_true", help="Connect on startup")
|
||||||
|
parser.add_argument("--python", action="store_true", help="Use Python transport (default)")
|
||||||
|
parser.add_argument("--rust", action="store_true", help="Use Rust transport")
|
||||||
|
args, gtk_args = parser.parse_known_args()
|
||||||
|
|
||||||
|
# Store parsed options so do_activate can read them
|
||||||
|
os.environ["_CHT_AUTO_CONNECT"] = "1" if args.auto_connect else "0"
|
||||||
|
os.environ["_CHT_RUST_TRANSPORT"] = "1" if args.rust else "0"
|
||||||
|
|
||||||
_filter_stderr()
|
_filter_stderr()
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
level=logging.DEBUG,
|
||||||
@@ -96,9 +112,10 @@ def main():
|
|||||||
datefmt="%H:%M:%S",
|
datefmt="%H:%M:%S",
|
||||||
)
|
)
|
||||||
log = logging.getLogger("cht")
|
log = logging.getLogger("cht")
|
||||||
log.info("CHT starting")
|
log.info("CHT starting (transport=%s, auto_connect=%s)",
|
||||||
|
"rust" if args.rust else "python", args.auto_connect)
|
||||||
app = ChtApp()
|
app = ChtApp()
|
||||||
return app.run(sys.argv)
|
return app.run([sys.argv[0]] + gtk_args)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering."""
|
"""Stream lifecycle — manages recording, scene detection, audio extraction, and transcription buffering."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from gi.repository import GLib
|
from gi.repository import GLib
|
||||||
@@ -66,6 +67,7 @@ class StreamLifecycle:
|
|||||||
"""
|
"""
|
||||||
self._streaming = True
|
self._streaming = True
|
||||||
self._gone_live = False
|
self._gone_live = False
|
||||||
|
self._start_monotonic = time.monotonic()
|
||||||
self._rust_transport = rust_transport
|
self._rust_transport = rust_transport
|
||||||
|
|
||||||
if rust_transport:
|
if rust_transport:
|
||||||
@@ -177,13 +179,16 @@ class StreamLifecycle:
|
|||||||
GLib.idle_add(self._go_live_once)
|
GLib.idle_add(self._go_live_once)
|
||||||
if self._stream_mgr:
|
if self._stream_mgr:
|
||||||
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
|
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
|
||||||
if self._stream_mgr:
|
|
||||||
self._stream_mgr.capture_now(on_new_frames=self._handle_new_scene_frames)
|
|
||||||
|
|
||||||
def _go_live_once(self):
|
def _go_live_once(self):
|
||||||
if self._stream_mgr:
|
if self._stream_mgr:
|
||||||
|
elapsed = time.monotonic() - self._start_monotonic
|
||||||
log.info("Going LIVE (startup delay elapsed)")
|
log.info("Going LIVE (startup delay elapsed)")
|
||||||
self._timeline.go_live()
|
self._timeline.go_live()
|
||||||
|
if self._stream_mgr.telemetry:
|
||||||
|
self._stream_mgr.telemetry.metric("first_live", {
|
||||||
|
"elapsed_s": round(elapsed, 2),
|
||||||
|
})
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _tick_live(self):
|
def _tick_live(self):
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ class StreamManager:
|
|||||||
self.agent_dir = self.session_dir / "agent"
|
self.agent_dir = self.session_dir / "agent"
|
||||||
|
|
||||||
self.readonly = False
|
self.readonly = False
|
||||||
self.telemetry = None
|
self._telemetry = None
|
||||||
|
|
||||||
self.recorder = StreamRecorder(self.session_dir)
|
self.recorder = StreamRecorder(self.session_dir)
|
||||||
self.processor = SessionProcessor(self.session_dir)
|
self.processor = SessionProcessor(self.session_dir)
|
||||||
@@ -88,7 +88,7 @@ class StreamManager:
|
|||||||
mgr.audio_dir = session_dir / "audio"
|
mgr.audio_dir = session_dir / "audio"
|
||||||
mgr.agent_dir = session_dir / "agent"
|
mgr.agent_dir = session_dir / "agent"
|
||||||
mgr.readonly = False
|
mgr.readonly = False
|
||||||
mgr.telemetry = None
|
mgr._telemetry = None
|
||||||
|
|
||||||
# No recorder — Rust server owns transport + recording.
|
# No recorder — Rust server owns transport + recording.
|
||||||
mgr.recorder = None
|
mgr.recorder = None
|
||||||
@@ -121,6 +121,16 @@ class StreamManager:
|
|||||||
session_id, len(mgr.recorder.recording_segments), mgr.frame_count)
|
session_id, len(mgr.recorder.recording_segments), mgr.frame_count)
|
||||||
return mgr
|
return mgr
|
||||||
|
|
||||||
|
@property
|
||||||
|
def telemetry(self):
|
||||||
|
return self._telemetry
|
||||||
|
|
||||||
|
@telemetry.setter
|
||||||
|
def telemetry(self, val):
|
||||||
|
self._telemetry = val
|
||||||
|
if self.processor:
|
||||||
|
self.processor._telemetry = val
|
||||||
|
|
||||||
# -- Recorder delegation --
|
# -- Recorder delegation --
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ class SessionProcessor:
|
|||||||
|
|
||||||
self._get_recording_path = None
|
self._get_recording_path = None
|
||||||
self._get_current_global_offset = None
|
self._get_current_global_offset = None
|
||||||
|
self._telemetry = None
|
||||||
|
|
||||||
def attach(self, get_recording_path, get_current_global_offset):
|
def attach(self, get_recording_path, get_current_global_offset):
|
||||||
"""Wire up callbacks to query the recorder's current state."""
|
"""Wire up callbacks to query the recorder's current state."""
|
||||||
@@ -75,6 +76,11 @@ class SessionProcessor:
|
|||||||
self._append_frame_index(entry)
|
self._append_frame_index(entry)
|
||||||
log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
|
log.info("Scene frame: %s at %.1fs", frame_id, global_ts)
|
||||||
|
|
||||||
|
if self._telemetry:
|
||||||
|
self._telemetry.metric("scene_frame", {
|
||||||
|
"id": frame_id, "global_ts": global_ts,
|
||||||
|
})
|
||||||
|
|
||||||
if self._on_new_frames:
|
if self._on_new_frames:
|
||||||
self._on_new_frames([entry])
|
self._on_new_frames([entry])
|
||||||
|
|
||||||
@@ -465,6 +471,15 @@ class SessionProcessor:
|
|||||||
if safe_duration is None or safe_duration <= 0:
|
if safe_duration is None or safe_duration <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Raw AAC files (from Rust server) have no reliable duration metadata.
|
||||||
|
# ffprobe wildly overestimates (e.g. 1569s for a 50s session).
|
||||||
|
# Cap to wall-clock elapsed time as a sanity bound.
|
||||||
|
wall_elapsed = self._wall_clock_offset()
|
||||||
|
if wall_elapsed > 0 and safe_duration > wall_elapsed * 1.5:
|
||||||
|
log.debug("Audio: capping probed duration %.1fs to wall-clock %.1fs",
|
||||||
|
safe_duration, wall_elapsed)
|
||||||
|
safe_duration = wall_elapsed
|
||||||
|
|
||||||
# Fail-safe: processed_time can accumulate past the file if the
|
# Fail-safe: processed_time can accumulate past the file if the
|
||||||
# source was recreated (e.g. server restarted same session).
|
# source was recreated (e.g. server restarted same session).
|
||||||
if processed_time > safe_duration:
|
if processed_time > safe_duration:
|
||||||
@@ -495,6 +510,11 @@ class SessionProcessor:
|
|||||||
global_start = processed_time + offset
|
global_start = processed_time + offset
|
||||||
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
|
log.info("Audio chunk: %s (%.1fs → %.1fs, global %.1fs)",
|
||||||
wav_path.name, processed_time, process_to, global_start)
|
wav_path.name, processed_time, process_to, global_start)
|
||||||
|
if self._telemetry:
|
||||||
|
self._telemetry.metric("audio_chunk", {
|
||||||
|
"chunk": chunk_num, "start": processed_time,
|
||||||
|
"end": process_to, "global_start": global_start,
|
||||||
|
})
|
||||||
if self._on_new_audio:
|
if self._on_new_audio:
|
||||||
try:
|
try:
|
||||||
self._on_new_audio(
|
self._on_new_audio(
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Main application window — wires Timeline to all components."""
|
"""Main application window — wires Timeline to all components."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import gi
|
import gi
|
||||||
@@ -301,7 +302,8 @@ class ChtWindow(Adw.ApplicationWindow):
|
|||||||
self._connect_btn.remove_css_class("suggested-action")
|
self._connect_btn.remove_css_class("suggested-action")
|
||||||
self._connect_btn.add_css_class("destructive-action")
|
self._connect_btn.add_css_class("destructive-action")
|
||||||
|
|
||||||
mgr = self._lifecycle.start(session_id=session_id, rust_transport=True)
|
rust = os.environ.get("_CHT_RUST_TRANSPORT", "0") == "1"
|
||||||
|
mgr = self._lifecycle.start(session_id=session_id, rust_transport=rust)
|
||||||
if mgr is None:
|
if mgr is None:
|
||||||
log.error("Failed to start stream — no cht-server session found")
|
log.error("Failed to start stream — no cht-server session found")
|
||||||
self._connect_btn.set_label("Connect")
|
self._connect_btn.set_label("Connect")
|
||||||
|
|||||||
372
ctrl/bench.py
Normal file
372
ctrl/bench.py
Normal file
@@ -0,0 +1,372 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Post-hoc session benchmark — extract timing metrics from session logs.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ctrl/bench.py --session data/sessions/20260410_160441
|
||||||
|
python ctrl/bench.py --latest # pick most recent session
|
||||||
|
python ctrl/bench.py --latest --json # machine-readable output
|
||||||
|
|
||||||
|
Parses telemetry.jsonl, session.log, frames/index.json, and (if present)
|
||||||
|
media/logs/server.log. No live session required — works on finished sessions.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
M1 Full startup Connect → first transcript
|
||||||
|
M1a → first duration update
|
||||||
|
M1b → first scene frame
|
||||||
|
M1c → first audio chunk
|
||||||
|
M1d → first transcript
|
||||||
|
M5 Audio lag How far audio extraction trails real-time
|
||||||
|
M6 Transcript lag Time from audio ready to transcript done
|
||||||
|
M7 Frame throughput Scene frames per minute
|
||||||
|
M9 Recorder health Unexpected restarts / segment rotations
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger("bench")
|
||||||
|
|
||||||
|
PROJECT_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
DATA_DIR = PROJECT_DIR / "data"
|
||||||
|
SESSIONS_DIR = DATA_DIR / "sessions"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_log_time(line: str) -> float | None:
|
||||||
|
"""Parse HH:MM:SS from session.log line → seconds since midnight."""
|
||||||
|
m = re.match(r"(\d{2}):(\d{2}):(\d{2})", line)
|
||||||
|
if m:
|
||||||
|
return int(m[1]) * 3600 + int(m[2]) * 60 + int(m[3])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load_telemetry(session_dir: Path) -> list[dict]:
|
||||||
|
path = session_dir / "telemetry.jsonl"
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
entries = []
|
||||||
|
for line in path.read_text().splitlines():
|
||||||
|
if line.strip():
|
||||||
|
try:
|
||||||
|
entries.append(json.loads(line))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def load_session_log(session_dir: Path) -> list[str]:
|
||||||
|
path = session_dir / "session.log"
|
||||||
|
return path.read_text().splitlines() if path.exists() else []
|
||||||
|
|
||||||
|
|
||||||
|
def load_frames_index(session_dir: Path) -> list[dict]:
|
||||||
|
path = session_dir / "frames" / "index.json"
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
return json.loads(path.read_text())
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def find_first_line(lines: list[str], pattern: str) -> tuple[float | None, str | None]:
|
||||||
|
"""Find first line matching pattern. Returns (time_seconds, full_line)."""
|
||||||
|
for line in lines:
|
||||||
|
if pattern in line:
|
||||||
|
return parse_log_time(line), line
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_metrics(session_dir: Path) -> dict:
|
||||||
|
tel = load_telemetry(session_dir)
|
||||||
|
log_lines = load_session_log(session_dir)
|
||||||
|
frames = load_frames_index(session_dir)
|
||||||
|
|
||||||
|
metrics = {}
|
||||||
|
|
||||||
|
# Session start time (from telemetry t=0 wall clock, or first log line)
|
||||||
|
session_start_t = None
|
||||||
|
for entry in tel:
|
||||||
|
if entry.get("name") == "session_start":
|
||||||
|
session_start_t = parse_log_time(log_lines[0]) if log_lines else None
|
||||||
|
break
|
||||||
|
|
||||||
|
# Session end
|
||||||
|
session_duration = None
|
||||||
|
for entry in tel:
|
||||||
|
if entry.get("name") == "session_end":
|
||||||
|
session_duration = entry.get("t")
|
||||||
|
metrics["session_duration_s"] = session_duration
|
||||||
|
|
||||||
|
# M1a: start → first duration update
|
||||||
|
t_start = session_start_t
|
||||||
|
t_duration, _ = find_first_line(log_lines, "Duration:")
|
||||||
|
if t_start is not None and t_duration is not None:
|
||||||
|
metrics["M1a_first_duration_s"] = t_duration - t_start
|
||||||
|
|
||||||
|
# M1b: start → first scene frame
|
||||||
|
t_frame, _ = find_first_line(log_lines, "Scene frame:")
|
||||||
|
if t_start is not None and t_frame is not None:
|
||||||
|
metrics["M1b_first_scene_frame_s"] = t_frame - t_start
|
||||||
|
|
||||||
|
# M1c: start → first audio chunk
|
||||||
|
t_audio, _ = find_first_line(log_lines, "Audio chunk:")
|
||||||
|
if t_start is not None and t_audio is not None:
|
||||||
|
metrics["M1c_first_audio_chunk_s"] = t_audio - t_start
|
||||||
|
|
||||||
|
# M1d: start → first transcript (whisper processing)
|
||||||
|
t_transcript, _ = find_first_line(log_lines, "faster_whisper: Processing audio")
|
||||||
|
if t_start is not None and t_transcript is not None:
|
||||||
|
metrics["M1d_first_transcript_s"] = t_transcript - t_start
|
||||||
|
|
||||||
|
# M1: full startup = start → first transcript
|
||||||
|
if "M1d_first_transcript_s" in metrics:
|
||||||
|
metrics["M1_full_startup_s"] = metrics["M1d_first_transcript_s"]
|
||||||
|
|
||||||
|
# Going LIVE time
|
||||||
|
t_live, _ = find_first_line(log_lines, "Going LIVE")
|
||||||
|
if t_start is not None and t_live is not None:
|
||||||
|
metrics["going_live_s"] = t_live - t_start
|
||||||
|
|
||||||
|
# M5: Audio extraction lag
|
||||||
|
# Parse "Audio chunk: chunk_N (Xs → Ys, global Zs)" lines
|
||||||
|
audio_lags = []
|
||||||
|
for line in log_lines:
|
||||||
|
m = re.search(r"Audio chunk: \S+ \(([\d.]+)s → ([\d.]+)s, global ([\d.]+)s\)", line)
|
||||||
|
if m:
|
||||||
|
end_time = float(m[2])
|
||||||
|
log_t = parse_log_time(line)
|
||||||
|
if log_t is not None and t_start is not None:
|
||||||
|
wall_elapsed = log_t - t_start
|
||||||
|
lag = wall_elapsed - end_time
|
||||||
|
if lag >= 0:
|
||||||
|
audio_lags.append(lag)
|
||||||
|
if audio_lags:
|
||||||
|
metrics["M5_audio_lag_avg_s"] = round(sum(audio_lags) / len(audio_lags), 1)
|
||||||
|
metrics["M5_audio_lag_max_s"] = round(max(audio_lags), 1)
|
||||||
|
metrics["M5_audio_lag_min_s"] = round(min(audio_lags), 1)
|
||||||
|
metrics["M5_audio_chunk_count"] = len(audio_lags)
|
||||||
|
|
||||||
|
# M6: Transcription lag
|
||||||
|
# Parse faster_whisper "Processing audio with duration MM:SS.mmm" or "HH:MM:SS.mmm"
|
||||||
|
transcript_durations = []
|
||||||
|
for line in log_lines:
|
||||||
|
# MM:SS.mmm format (e.g., 00:06.145)
|
||||||
|
m = re.search(r"faster_whisper: Processing audio with duration (\d+):([\d.]+)$", line)
|
||||||
|
if m:
|
||||||
|
dur = int(m[1]) * 60 + float(m[2])
|
||||||
|
transcript_durations.append(dur)
|
||||||
|
continue
|
||||||
|
# HH:MM:SS.mmm format
|
||||||
|
m = re.search(r"faster_whisper: Processing audio with duration (\d+):(\d+):([\d.]+)", line)
|
||||||
|
if m:
|
||||||
|
dur = int(m[1]) * 3600 + int(m[2]) * 60 + float(m[3])
|
||||||
|
transcript_durations.append(dur)
|
||||||
|
if transcript_durations:
|
||||||
|
metrics["M6_whisper_processing_avg_s"] = round(sum(transcript_durations) / len(transcript_durations), 1)
|
||||||
|
metrics["M6_transcript_count"] = len(transcript_durations)
|
||||||
|
|
||||||
|
# M7: Frame throughput
|
||||||
|
if frames and session_duration and session_duration > 0:
|
||||||
|
minutes = session_duration / 60
|
||||||
|
metrics["M7_frame_throughput_per_min"] = round(len(frames) / minutes, 1)
|
||||||
|
metrics["M7_total_frames"] = len(frames)
|
||||||
|
|
||||||
|
# M9: Recorder health
|
||||||
|
restarts = sum(1 for l in log_lines if "Recorder died" in l)
|
||||||
|
segments = sum(1 for l in log_lines if "Restarting recorder" in l)
|
||||||
|
metrics["M9_recorder_restarts"] = restarts
|
||||||
|
metrics["M9_segment_rotations"] = segments
|
||||||
|
|
||||||
|
# Scene detection mode
|
||||||
|
if any("Scene detector: connecting" in l for l in log_lines):
|
||||||
|
metrics["scene_mode"] = "rust_relay"
|
||||||
|
elif any("Recorder+scene: pid=" in l for l in log_lines):
|
||||||
|
metrics["scene_mode"] = "python_single_process"
|
||||||
|
else:
|
||||||
|
metrics["scene_mode"] = "unknown"
|
||||||
|
|
||||||
|
# Transport mode — check for Rust-specific markers
|
||||||
|
if any("Rust session dir" in l or "Attached to Rust session" in l for l in log_lines):
|
||||||
|
metrics["transport"] = "rust"
|
||||||
|
elif any("Recorder+scene: pid=" in l for l in log_lines):
|
||||||
|
metrics["transport"] = "python"
|
||||||
|
else:
|
||||||
|
# Check file signatures: Rust writes audio.aac separately, Python muxes into fMP4
|
||||||
|
aac = session_dir / "stream" / "audio.aac"
|
||||||
|
if aac.exists():
|
||||||
|
metrics["transport"] = "rust"
|
||||||
|
elif any("run_async:" in l for l in log_lines):
|
||||||
|
metrics["transport"] = "python"
|
||||||
|
else:
|
||||||
|
metrics["transport"] = "unknown"
|
||||||
|
|
||||||
|
# Scene mode from log markers
|
||||||
|
if metrics.get("scene_mode") == "unknown":
|
||||||
|
if any("Recorder+scene: pid=" in l for l in log_lines):
|
||||||
|
metrics["scene_mode"] = "python_single_process"
|
||||||
|
elif any("run_async:" in l for l in log_lines):
|
||||||
|
metrics["scene_mode"] = "python_single_process"
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
|
def print_report(session_dir: Path, metrics: dict):
|
||||||
|
log.info("=" * 60)
|
||||||
|
log.info(" CHT Benchmark Report")
|
||||||
|
log.info(" Session: %s", session_dir.name)
|
||||||
|
log.info(" Transport: %s", metrics.get("transport", "?"))
|
||||||
|
log.info(" Scene mode: %s", metrics.get("scene_mode", "?"))
|
||||||
|
log.info(" Duration: %ss", metrics.get("session_duration_s", "?"))
|
||||||
|
log.info("=" * 60)
|
||||||
|
|
||||||
|
rows = [
|
||||||
|
("M1", "Full startup", "M1_full_startup_s", "s"),
|
||||||
|
("M1a", " → first duration", "M1a_first_duration_s", "s"),
|
||||||
|
("M1b", " → first scene frame", "M1b_first_scene_frame_s", "s"),
|
||||||
|
("M1c", " → first audio chunk", "M1c_first_audio_chunk_s", "s"),
|
||||||
|
("M1d", " → first transcript", "M1d_first_transcript_s", "s"),
|
||||||
|
("", " → going live", "going_live_s", "s"),
|
||||||
|
("M5", "Audio lag (avg)", "M5_audio_lag_avg_s", "s"),
|
||||||
|
("M5", "Audio lag (max)", "M5_audio_lag_max_s", "s"),
|
||||||
|
("M5", "Audio chunks", "M5_audio_chunk_count", ""),
|
||||||
|
("M6", "Whisper processing (avg)", "M6_whisper_processing_avg_s", "s"),
|
||||||
|
("M6", "Transcripts produced", "M6_transcript_count", ""),
|
||||||
|
("M7", "Frame throughput", "M7_frame_throughput_per_min", "/min"),
|
||||||
|
("M7", "Total frames", "M7_total_frames", ""),
|
||||||
|
("M9", "Recorder restarts", "M9_recorder_restarts", ""),
|
||||||
|
("M9", "Segment rotations", "M9_segment_rotations", ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
for code, label, key, unit in rows:
|
||||||
|
val = metrics.get(key)
|
||||||
|
if val is not None:
|
||||||
|
log.info(" %4s %28s %s%s", code, label, val, unit)
|
||||||
|
else:
|
||||||
|
log.info(" %4s %28s -", code, label)
|
||||||
|
|
||||||
|
|
||||||
|
def compare_ground_truth(session_dir: Path, gt: dict) -> dict:
|
||||||
|
"""Compare detected scene frames against ground truth scene changes."""
|
||||||
|
frames = load_frames_index(session_dir)
|
||||||
|
gt_scenes = gt.get("scenes", [])
|
||||||
|
|
||||||
|
if not frames or not gt_scenes:
|
||||||
|
return {"error": "no frames or no ground truth scenes"}
|
||||||
|
|
||||||
|
detected_ts = sorted(f["timestamp"] for f in frames)
|
||||||
|
expected_ts = sorted(s["timestamp_s"] for s in gt_scenes)
|
||||||
|
|
||||||
|
# For each expected scene change, find the closest detected frame
|
||||||
|
matches = []
|
||||||
|
for exp_ts in expected_ts:
|
||||||
|
best = None
|
||||||
|
best_delta = float("inf")
|
||||||
|
for det_ts in detected_ts:
|
||||||
|
delta = det_ts - exp_ts
|
||||||
|
if abs(delta) < abs(best_delta):
|
||||||
|
best_delta = delta
|
||||||
|
best = det_ts
|
||||||
|
matches.append({
|
||||||
|
"expected_s": exp_ts,
|
||||||
|
"detected_s": best,
|
||||||
|
"delta_s": round(best_delta, 3) if best is not None else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
deltas = [m["delta_s"] for m in matches if m["delta_s"] is not None]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"expected_scenes": len(expected_ts),
|
||||||
|
"detected_frames": len(detected_ts),
|
||||||
|
"matches": matches,
|
||||||
|
"avg_delta_s": round(sum(deltas) / len(deltas), 3) if deltas else None,
|
||||||
|
"max_delta_s": round(max(abs(d) for d in deltas), 3) if deltas else None,
|
||||||
|
"missed": sum(1 for m in matches if m["delta_s"] is None or abs(m["delta_s"]) > 10),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def print_ground_truth_report(gt: dict):
|
||||||
|
log.info("")
|
||||||
|
log.info(" Scene detection vs ground truth:")
|
||||||
|
log.info(" Expected scenes: %d", gt.get("expected_scenes", 0))
|
||||||
|
log.info(" Detected frames: %d", gt.get("detected_frames", 0))
|
||||||
|
if gt.get("avg_delta_s") is not None:
|
||||||
|
log.info(" Avg detection delta: %ss", gt["avg_delta_s"])
|
||||||
|
log.info(" Max detection delta: %ss", gt["max_delta_s"])
|
||||||
|
if gt.get("missed", 0) > 0:
|
||||||
|
log.warning(" Missed scenes: %d", gt["missed"])
|
||||||
|
for m in gt.get("matches", []):
|
||||||
|
status = "OK" if m["delta_s"] is not None and abs(m["delta_s"]) < 5 else "MISS"
|
||||||
|
det = f"{m['detected_s']:.1f}s" if m["detected_s"] is not None else "---"
|
||||||
|
delta = f"+{m['delta_s']:.1f}s" if m["delta_s"] is not None else ""
|
||||||
|
log.info(" %4s expected=%5.1fs detected=%s %s", status, m["expected_s"], det, delta)
|
||||||
|
|
||||||
|
|
||||||
|
def find_latest_session() -> Path | None:
|
||||||
|
if not SESSIONS_DIR.exists():
|
||||||
|
return None
|
||||||
|
dirs = sorted(SESSIONS_DIR.iterdir(), reverse=True)
|
||||||
|
for d in dirs:
|
||||||
|
if d.is_dir() and (d / "telemetry.jsonl").exists():
|
||||||
|
return d
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="CHT session benchmark")
|
||||||
|
parser.add_argument("--session", type=Path, help="Path to session directory")
|
||||||
|
parser.add_argument("--latest", action="store_true", help="Use most recent session")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output JSON instead of table")
|
||||||
|
parser.add_argument("--ground-truth", type=Path, help="Ground truth JSON for scene comparison")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.latest:
|
||||||
|
session_dir = find_latest_session()
|
||||||
|
if not session_dir:
|
||||||
|
log.error("No sessions found")
|
||||||
|
sys.exit(1)
|
||||||
|
elif args.session:
|
||||||
|
session_dir = args.session
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not session_dir.exists():
|
||||||
|
log.error("Session not found: %s", session_dir)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
metrics = extract_metrics(session_dir)
|
||||||
|
metrics["session_id"] = session_dir.name
|
||||||
|
|
||||||
|
# Ground truth comparison
|
||||||
|
if args.ground_truth and args.ground_truth.exists():
|
||||||
|
gt = json.loads(args.ground_truth.read_text())
|
||||||
|
comparison = compare_ground_truth(session_dir, gt)
|
||||||
|
metrics["ground_truth"] = comparison
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
sys.stdout.write(json.dumps(metrics, indent=2) + "\n")
|
||||||
|
else:
|
||||||
|
print_report(session_dir, metrics)
|
||||||
|
if "ground_truth" in metrics:
|
||||||
|
print_ground_truth_report(metrics["ground_truth"])
|
||||||
|
|
||||||
|
# Save report
|
||||||
|
bench_dir = DATA_DIR / "bench"
|
||||||
|
bench_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
report_path = bench_dir / f"{session_dir.name}.json"
|
||||||
|
report_path.write_text(json.dumps(metrics, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
164
ctrl/bench_delay.py
Normal file
164
ctrl/bench_delay.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Live scene detection latency benchmark (M4).
|
||||||
|
|
||||||
|
Measures time from a triggered visual change on the sender to a new JPEG
|
||||||
|
appearing in the receiver's frames/ directory.
|
||||||
|
|
||||||
|
Usage (run on receiver, sender accessible via SSH):
|
||||||
|
python ctrl/bench_delay.py --session-dir data/sessions/CURRENT --sender mariano@sender
|
||||||
|
python ctrl/bench_delay.py --frames-dir data/sessions/CURRENT/frames --sender mariano@sender
|
||||||
|
|
||||||
|
How it works:
|
||||||
|
1. Records the current frame count in frames/index.json
|
||||||
|
2. SSH to sender, triggers a visual change (xdotool workspace switch)
|
||||||
|
3. Polls frames/index.json for a new entry (or watches via mtime)
|
||||||
|
4. Measures wall-clock difference = scene detection latency
|
||||||
|
|
||||||
|
For repeated measurements, use --repeat N with --interval S between triggers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger("bench_delay")
|
||||||
|
|
||||||
|
|
||||||
|
def get_frame_count(frames_dir: Path) -> int:
|
||||||
|
index = frames_dir / "index.json"
|
||||||
|
if not index.exists():
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
return len(json.loads(index.read_text()))
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_frame_mtime(frames_dir: Path) -> float:
|
||||||
|
index = frames_dir / "index.json"
|
||||||
|
if not index.exists():
|
||||||
|
return 0.0
|
||||||
|
return index.stat().st_mtime
|
||||||
|
|
||||||
|
|
||||||
|
def trigger_scene_change(sender: str, method: str = "workspace") -> float:
|
||||||
|
"""Trigger a visual change on the sender. Returns wall-clock time of trigger."""
|
||||||
|
if method == "workspace":
|
||||||
|
# xdotool switch workspace — causes a full-screen visual change
|
||||||
|
cmd = ["ssh", sender, "DISPLAY=:0 xdotool key super+Right"]
|
||||||
|
elif method == "color":
|
||||||
|
# Flash a fullscreen color using xterm (more dramatic change)
|
||||||
|
cmd = ["ssh", sender,
|
||||||
|
"DISPLAY=:0 bash -c 'xterm -fullscreen -bg red -e sleep 0.5 &'"]
|
||||||
|
else:
|
||||||
|
log.error("Unknown trigger method: %s", method)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
t = time.monotonic()
|
||||||
|
wall = time.time()
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, timeout=5, capture_output=True)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
log.warning("SSH trigger timed out")
|
||||||
|
return wall
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_new_frame(frames_dir: Path, initial_count: int,
|
||||||
|
timeout: float = 15.0, poll_interval: float = 0.1) -> float | None:
|
||||||
|
"""Wait for a new frame to appear. Returns wall-clock time when detected, or None."""
|
||||||
|
deadline = time.monotonic() + timeout
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
count = get_frame_count(frames_dir)
|
||||||
|
if count > initial_count:
|
||||||
|
return time.time()
|
||||||
|
time.sleep(poll_interval)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def run_measurement(frames_dir: Path, sender: str, method: str) -> dict:
|
||||||
|
initial_count = get_frame_count(frames_dir)
|
||||||
|
trigger_wall = trigger_scene_change(sender, method)
|
||||||
|
detected_wall = wait_for_new_frame(frames_dir, initial_count)
|
||||||
|
|
||||||
|
if detected_wall is None:
|
||||||
|
return {"trigger_wall": trigger_wall, "latency_s": None, "timed_out": True}
|
||||||
|
|
||||||
|
latency = detected_wall - trigger_wall
|
||||||
|
return {
|
||||||
|
"trigger_wall": trigger_wall,
|
||||||
|
"detected_wall": detected_wall,
|
||||||
|
"latency_s": round(latency, 3),
|
||||||
|
"timed_out": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Scene detection latency benchmark")
|
||||||
|
parser.add_argument("--frames-dir", type=Path, help="Path to frames/ directory")
|
||||||
|
parser.add_argument("--session-dir", type=Path, help="Path to session directory")
|
||||||
|
parser.add_argument("--sender", required=True, help="SSH target for sender (user@host)")
|
||||||
|
parser.add_argument("--method", default="workspace", choices=["workspace", "color"],
|
||||||
|
help="How to trigger visual change")
|
||||||
|
parser.add_argument("--repeat", type=int, default=3, help="Number of measurements")
|
||||||
|
parser.add_argument("--interval", type=float, default=5.0, help="Seconds between triggers")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output JSON")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.session_dir:
|
||||||
|
frames_dir = args.session_dir / "frames"
|
||||||
|
elif args.frames_dir:
|
||||||
|
frames_dir = args.frames_dir
|
||||||
|
else:
|
||||||
|
parser.error("Provide --frames-dir or --session-dir")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not frames_dir.exists():
|
||||||
|
log.error("Frames dir not found: %s", frames_dir)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i in range(args.repeat):
|
||||||
|
if i > 0:
|
||||||
|
time.sleep(args.interval)
|
||||||
|
log.info("Trigger %d/%d...", i + 1, args.repeat)
|
||||||
|
r = run_measurement(frames_dir, args.sender, args.method)
|
||||||
|
if r["timed_out"]:
|
||||||
|
log.warning("TIMEOUT (no frame in 15s)")
|
||||||
|
else:
|
||||||
|
log.info(" latency: %ss", r["latency_s"])
|
||||||
|
results.append(r)
|
||||||
|
|
||||||
|
latencies = [r["latency_s"] for r in results if r["latency_s"] is not None]
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
print(json.dumps({"measurements": results, "summary": {
|
||||||
|
"count": len(latencies),
|
||||||
|
"avg_s": round(sum(latencies) / len(latencies), 3) if latencies else None,
|
||||||
|
"min_s": round(min(latencies), 3) if latencies else None,
|
||||||
|
"max_s": round(max(latencies), 3) if latencies else None,
|
||||||
|
"timeouts": sum(1 for r in results if r["timed_out"]),
|
||||||
|
}}, indent=2))
|
||||||
|
else:
|
||||||
|
log.info("M4 Scene detection latency:")
|
||||||
|
if latencies:
|
||||||
|
log.info(" avg: %.1fs", sum(latencies) / len(latencies))
|
||||||
|
log.info(" min: %.1fs", min(latencies))
|
||||||
|
log.info(" max: %.1fs", max(latencies))
|
||||||
|
timeouts = sum(1 for r in results if r["timed_out"])
|
||||||
|
if timeouts:
|
||||||
|
log.warning(" timeouts: %d/%d", timeouts, len(results))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
25
ctrl/client.sh
Executable file
25
ctrl/client.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Start the client (sender) — Python or Rust mode.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ctrl/client.sh --python [RECEIVER_IP] [PORT] # kmsgrab + mpegts (default port 4444)
|
||||||
|
# ctrl/client.sh --rust [server_addr] # Rust framed protocol (default mcrndeb:4447)
|
||||||
|
#
|
||||||
|
# Default: --python
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
MODE="python"
|
||||||
|
|
||||||
|
# Parse mode flag
|
||||||
|
if [[ "${1:-}" == "--python" ]]; then
|
||||||
|
MODE="python"; shift
|
||||||
|
elif [[ "${1:-}" == "--rust" ]]; then
|
||||||
|
MODE="rust"; shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$MODE" = "rust" ]; then
|
||||||
|
exec "$PROJECT_DIR/media/ctrl/client.sh" "$@"
|
||||||
|
else
|
||||||
|
exec sudo python3 "$PROJECT_DIR/sender/stream_av.py" "$@"
|
||||||
|
fi
|
||||||
116
ctrl/e2e_test.sh
Executable file
116
ctrl/e2e_test.sh
Executable file
@@ -0,0 +1,116 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# E2E benchmark test — fully automated, run from the SENDER machine.
|
||||||
|
#
|
||||||
|
# Starts everything via SSH, captures test video, collects results.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ctrl/e2e_test.sh --python [--duration 30]
|
||||||
|
# ctrl/e2e_test.sh --rust [--duration 30]
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
cd "$PROJECT_DIR"
|
||||||
|
|
||||||
|
DURATION=30
|
||||||
|
INTERVAL=5
|
||||||
|
RECEIVER="mcrndeb"
|
||||||
|
RDIR="wdir/cht"
|
||||||
|
MODE="python"
|
||||||
|
PLAY_DELAY=3
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--python) MODE="python"; shift ;;
|
||||||
|
--rust) MODE="rust"; shift ;;
|
||||||
|
--duration) DURATION="$2"; shift 2 ;;
|
||||||
|
--interval) INTERVAL="$2"; shift 2 ;;
|
||||||
|
--receiver) RECEIVER="$2"; shift 2 ;;
|
||||||
|
--play-delay) PLAY_DELAY="$2"; shift 2 ;;
|
||||||
|
*) echo "Unknown arg: $1"; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
VIDEO="tests/fixtures/test_scene_${DURATION}s.mp4"
|
||||||
|
GROUND_TRUTH="tests/fixtures/test_scene_${DURATION}s_ground_truth.json"
|
||||||
|
|
||||||
|
# Generate test video if needed
|
||||||
|
if [ ! -f "$VIDEO" ]; then
|
||||||
|
echo "=== Generating test video ==="
|
||||||
|
python3 ctrl/gen_test_video.py --duration "$DURATION" --interval "$INTERVAL"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# PIDs to clean up
|
||||||
|
PIDS=()
|
||||||
|
cleanup() {
|
||||||
|
echo "=== Cleaning up ==="
|
||||||
|
for pid in "${PIDS[@]}"; do
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
# Kill sudo'd client
|
||||||
|
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
|
||||||
|
# Stop remote processes
|
||||||
|
ssh "$RECEIVER" "pkill -f 'cht-server|cht.app' 2>/dev/null" || true
|
||||||
|
wait 2>/dev/null
|
||||||
|
}
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
echo "=== E2E test: $MODE mode, ${DURATION}s ==="
|
||||||
|
|
||||||
|
# Step 1: Start receiver side (on mcrndeb via SSH)
|
||||||
|
if [ "$MODE" = "rust" ]; then
|
||||||
|
echo "--- Starting Rust server on $RECEIVER ---"
|
||||||
|
ssh -tt "$RECEIVER" "cd $RDIR && ctrl/server.sh" &
|
||||||
|
PIDS+=($!)
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
echo "--- Starting app on $RECEIVER (rust transport, auto-connect) ---"
|
||||||
|
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=1 ctrl/app.sh" &
|
||||||
|
PIDS+=($!)
|
||||||
|
sleep 3
|
||||||
|
else
|
||||||
|
echo "--- Starting app on $RECEIVER (python transport, auto-connect) ---"
|
||||||
|
ssh -tt "$RECEIVER" "cd $RDIR && CHT_AUTO_CONNECT=1 CHT_RUST_TRANSPORT=0 ctrl/app.sh" &
|
||||||
|
PIDS+=($!)
|
||||||
|
sleep 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 2: Play test video fullscreen on sender
|
||||||
|
echo "--- Playing test video fullscreen ---"
|
||||||
|
mpv --fullscreen --loop-file=inf --no-terminal "$VIDEO" &
|
||||||
|
PIDS+=($!)
|
||||||
|
sleep "$PLAY_DELAY"
|
||||||
|
|
||||||
|
# Step 3: Start client (sender)
|
||||||
|
echo "--- Starting $MODE client → $RECEIVER ---"
|
||||||
|
if [ "$MODE" = "rust" ]; then
|
||||||
|
ctrl/client.sh --rust "${RECEIVER}:4447" &
|
||||||
|
else
|
||||||
|
ctrl/client.sh --python "$RECEIVER" &
|
||||||
|
fi
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
# Step 4: Wait for capture + processing
|
||||||
|
WAIT=$(( DURATION + 15 ))
|
||||||
|
echo "--- Waiting ${WAIT}s for capture + processing ---"
|
||||||
|
sleep "$WAIT"
|
||||||
|
|
||||||
|
# Step 5: Stop sender side
|
||||||
|
echo "--- Stopping sender ---"
|
||||||
|
sudo pkill -f "stream_av\|cht-client" 2>/dev/null || true
|
||||||
|
kill "${PIDS[-1]}" 2>/dev/null || true # mpv
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
# Step 6: Stop receiver side
|
||||||
|
echo "--- Stopping receiver ---"
|
||||||
|
ssh "$RECEIVER" "pkill -f 'cht.app' 2>/dev/null" || true
|
||||||
|
sleep 2
|
||||||
|
ssh "$RECEIVER" "pkill -f 'cht-server' 2>/dev/null" || true
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
# Step 7: Benchmark
|
||||||
|
echo ""
|
||||||
|
echo "=== Benchmark results ($MODE) ==="
|
||||||
|
ssh "$RECEIVER" "cd $RDIR && python3 ctrl/bench.py --latest --ground-truth $GROUND_TRUTH"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "JSON: ssh $RECEIVER 'cd $RDIR && python3 ctrl/bench.py --latest --json --ground-truth $GROUND_TRUTH'"
|
||||||
148
ctrl/gen_test_video.py
Executable file
148
ctrl/gen_test_video.py
Executable file
@@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Generate a test video with known scene changes and audio markers.
|
||||||
|
|
||||||
|
The video is played fullscreen on the sender while kmsgrab captures it —
|
||||||
|
simulating a meeting with deterministic, reproducible content.
|
||||||
|
|
||||||
|
- Scene changes: solid color blocks every INTERVAL seconds
|
||||||
|
- Overlay: large elapsed-seconds counter for visual sync measurement
|
||||||
|
- Audio: sine tone (changes frequency each scene for transcription ground truth)
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
tests/fixtures/test_scene_30s.mp4
|
||||||
|
tests/fixtures/test_scene_30s_ground_truth.json
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ctrl/gen_test_video.py [--duration 30] [--interval 5]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger("gen_test_video")
|
||||||
|
|
||||||
|
PROJECT_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"
|
||||||
|
|
||||||
|
# Scene colors (RGB hex)
|
||||||
|
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]
|
||||||
|
|
||||||
|
# Speech sample (Harvard sentences, public domain, Open Speech Repository)
|
||||||
|
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
|
||||||
|
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_speech_sample():
|
||||||
|
"""Download speech sample if not present."""
|
||||||
|
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
if SPEECH_SAMPLE.exists():
|
||||||
|
return
|
||||||
|
log.info("Downloading speech sample from Open Speech Repository...")
|
||||||
|
import urllib.request
|
||||||
|
urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
|
||||||
|
log.info("Saved: %s", SPEECH_SAMPLE)
|
||||||
|
|
||||||
|
|
||||||
|
def generate(duration: int, interval: int):
|
||||||
|
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
ensure_speech_sample()
|
||||||
|
|
||||||
|
video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
|
||||||
|
truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"
|
||||||
|
|
||||||
|
num_scenes = duration // interval
|
||||||
|
nc = len(COLORS)
|
||||||
|
|
||||||
|
# Video: colored segments with timer overlay, concatenated.
|
||||||
|
# Audio: speech sample looped to fill duration (real speech for whisper testing).
|
||||||
|
filter_parts = []
|
||||||
|
for i in range(num_scenes):
|
||||||
|
color = COLORS[i % nc]
|
||||||
|
seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
|
||||||
|
offset = i * interval
|
||||||
|
filter_parts.append(
|
||||||
|
f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
|
||||||
|
f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
|
||||||
|
f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
|
||||||
|
f"borderw=6:bordercolor=black"
|
||||||
|
f"[v{i}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
|
||||||
|
filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
|
||||||
|
# Audio: speech sample is input 1 (input 0 is the lavfi dummy)
|
||||||
|
filter_parts.append(
|
||||||
|
f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
|
||||||
|
f"atrim=0:{duration},volume=0.8[aout]"
|
||||||
|
)
|
||||||
|
|
||||||
|
filter_complex = ";\n".join(filter_parts)
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-y",
|
||||||
|
"-f", "lavfi", "-i", "anullsrc", # dummy (video segments come from filter)
|
||||||
|
*[arg for i in range(num_scenes) for arg in []], # no extra inputs needed for video
|
||||||
|
"-i", str(SPEECH_SAMPLE), # speech audio input
|
||||||
|
"-filter_complex", filter_complex,
|
||||||
|
"-map", "[vout]", "-map", "[aout]",
|
||||||
|
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
|
||||||
|
"-g", "30", "-keyint_min", "30",
|
||||||
|
"-c:a", "aac", "-b:a", "128k",
|
||||||
|
"-t", str(duration),
|
||||||
|
str(video_path),
|
||||||
|
"-hide_banner", "-loglevel", "warning",
|
||||||
|
]
|
||||||
|
|
||||||
|
log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
|
||||||
|
log.info("Output: %s", video_path)
|
||||||
|
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
log.error("ffmpeg failed:\n%s", result.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
log.info("Video generated: %s", video_path)
|
||||||
|
|
||||||
|
# Write ground truth
|
||||||
|
scenes = []
|
||||||
|
for i in range(num_scenes):
|
||||||
|
scenes.append({
|
||||||
|
"scene_index": i,
|
||||||
|
"timestamp_s": i * interval,
|
||||||
|
"color_hex": COLORS[i % nc],
|
||||||
|
})
|
||||||
|
|
||||||
|
truth = {
|
||||||
|
"duration_s": duration,
|
||||||
|
"interval_s": interval,
|
||||||
|
"num_scenes": num_scenes,
|
||||||
|
"video_path": str(video_path),
|
||||||
|
"scenes": scenes,
|
||||||
|
}
|
||||||
|
truth_path.write_text(json.dumps(truth, indent=2))
|
||||||
|
log.info("Ground truth: %s", truth_path)
|
||||||
|
|
||||||
|
return video_path, truth_path
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Generate CHT test video")
|
||||||
|
parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
|
||||||
|
parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
generate(args.duration, args.interval)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Start the sender on this machine
|
|
||||||
# Usage: ./sender.sh RECEIVER_IP [PORT]
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
||||||
|
|
||||||
exec sudo "$PROJECT_DIR/sender/stream_av.sh" "$@"
|
|
||||||
8
ctrl/server.sh
Executable file
8
ctrl/server.sh
Executable file
@@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Start the Rust media server (receiver).
|
||||||
|
# Thin wrapper around media/ctrl/server.sh.
|
||||||
|
# Usage: ctrl/server.sh [port]
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
exec "$PROJECT_DIR/media/ctrl/server.sh" "$@"
|
||||||
142
logs/client.log
142
logs/client.log
File diff suppressed because one or more lines are too long
38
tests/fixtures/test_scene_30s_ground_truth.json
vendored
Normal file
38
tests/fixtures/test_scene_30s_ground_truth.json
vendored
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
"duration_s": 30,
|
||||||
|
"interval_s": 5,
|
||||||
|
"num_scenes": 6,
|
||||||
|
"video_path": "/home/mariano/wdir/cht/tests/fixtures/test_scene_30s.mp4",
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"scene_index": 0,
|
||||||
|
"timestamp_s": 0,
|
||||||
|
"color_hex": "FF0000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scene_index": 1,
|
||||||
|
"timestamp_s": 5,
|
||||||
|
"color_hex": "0000FF"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scene_index": 2,
|
||||||
|
"timestamp_s": 10,
|
||||||
|
"color_hex": "00FF00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scene_index": 3,
|
||||||
|
"timestamp_s": 15,
|
||||||
|
"color_hex": "FFFF00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scene_index": 4,
|
||||||
|
"timestamp_s": 20,
|
||||||
|
"color_hex": "FF00FF"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"scene_index": 5,
|
||||||
|
"timestamp_s": 25,
|
||||||
|
"color_hex": "00FFFF"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
1
tests/fixtures/test_speech.ogg
vendored
Normal file
1
tests/fixtures/test_speech.ogg
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
File not found: /v1/AUTH_mw/wikipedia-commons-local-public.22/2/22/En-us-United_States_Constitution-Article_1-Section_1.ogg
|
||||||
Reference in New Issue
Block a user