proper tests

This commit is contained in:
2026-04-10 18:29:58 -03:00
parent e906b0a963
commit ea9dbf8772
16 changed files with 1077 additions and 15 deletions

148
ctrl/gen_test_video.py Executable file
View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""Generate a test video with known scene changes and audio markers.
The video is played fullscreen on the sender while kmsgrab captures it —
simulating a meeting with deterministic, reproducible content.
- Scene changes: solid color blocks every INTERVAL seconds
- Overlay: large elapsed-seconds counter for visual sync measurement
- Audio: sine tone (changes frequency each scene for transcription ground truth)
Outputs:
tests/fixtures/test_scene_30s.mp4
tests/fixtures/test_scene_30s_ground_truth.json
Usage:
python ctrl/gen_test_video.py [--duration 30] [--interval 5]
"""
import argparse
import json
import logging
import subprocess
import sys
from pathlib import Path
log = logging.getLogger("gen_test_video")
PROJECT_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"
# Scene colors (RGB hex)
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]
# Speech sample (Harvard sentences, public domain, Open Speech Repository)
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
def ensure_speech_sample():
"""Download speech sample if not present."""
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
if SPEECH_SAMPLE.exists():
return
log.info("Downloading speech sample from Open Speech Repository...")
import urllib.request
urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
log.info("Saved: %s", SPEECH_SAMPLE)
def generate(duration: int, interval: int):
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
ensure_speech_sample()
video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"
num_scenes = duration // interval
nc = len(COLORS)
# Video: colored segments with timer overlay, concatenated.
# Audio: speech sample looped to fill duration (real speech for whisper testing).
filter_parts = []
for i in range(num_scenes):
color = COLORS[i % nc]
seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
offset = i * interval
filter_parts.append(
f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
f"borderw=6:bordercolor=black"
f"[v{i}]"
)
v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
# Audio: speech sample is input 1 (input 0 is the lavfi dummy)
filter_parts.append(
f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
f"atrim=0:{duration},volume=0.8[aout]"
)
filter_complex = ";\n".join(filter_parts)
cmd = [
"ffmpeg", "-y",
"-f", "lavfi", "-i", "anullsrc", # dummy (video segments come from filter)
*[arg for i in range(num_scenes) for arg in []], # no extra inputs needed for video
"-i", str(SPEECH_SAMPLE), # speech audio input
"-filter_complex", filter_complex,
"-map", "[vout]", "-map", "[aout]",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
"-g", "30", "-keyint_min", "30",
"-c:a", "aac", "-b:a", "128k",
"-t", str(duration),
str(video_path),
"-hide_banner", "-loglevel", "warning",
]
log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
log.info("Output: %s", video_path)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log.error("ffmpeg failed:\n%s", result.stderr)
sys.exit(1)
log.info("Video generated: %s", video_path)
# Write ground truth
scenes = []
for i in range(num_scenes):
scenes.append({
"scene_index": i,
"timestamp_s": i * interval,
"color_hex": COLORS[i % nc],
})
truth = {
"duration_s": duration,
"interval_s": interval,
"num_scenes": num_scenes,
"video_path": str(video_path),
"scenes": scenes,
}
truth_path.write_text(json.dumps(truth, indent=2))
log.info("Ground truth: %s", truth_path)
return video_path, truth_path
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(description="Generate CHT test video")
parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
args = parser.parse_args()
generate(args.duration, args.interval)
if __name__ == "__main__":
main()