proper tests
This commit is contained in:
148
ctrl/gen_test_video.py
Executable file
148
ctrl/gen_test_video.py
Executable file
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate a test video with known scene changes and audio markers.
|
||||
|
||||
The video is played fullscreen on the sender while kmsgrab captures it —
|
||||
simulating a meeting with deterministic, reproducible content.
|
||||
|
||||
- Scene changes: solid color blocks every INTERVAL seconds
|
||||
- Overlay: large elapsed-seconds counter for visual sync measurement
|
||||
- Audio: sine tone (changes frequency each scene for transcription ground truth)
|
||||
|
||||
Outputs:
|
||||
tests/fixtures/test_scene_30s.mp4
|
||||
tests/fixtures/test_scene_30s_ground_truth.json
|
||||
|
||||
Usage:
|
||||
python ctrl/gen_test_video.py [--duration 30] [--interval 5]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger("gen_test_video")
|
||||
|
||||
PROJECT_DIR = Path(__file__).resolve().parent.parent
|
||||
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"
|
||||
|
||||
# Scene colors (RGB hex)
|
||||
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]
|
||||
|
||||
# Speech sample (Harvard sentences, public domain, Open Speech Repository)
|
||||
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
|
||||
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
|
||||
|
||||
|
||||
def ensure_speech_sample():
|
||||
"""Download speech sample if not present."""
|
||||
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if SPEECH_SAMPLE.exists():
|
||||
return
|
||||
log.info("Downloading speech sample from Open Speech Repository...")
|
||||
import urllib.request
|
||||
urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
|
||||
log.info("Saved: %s", SPEECH_SAMPLE)
|
||||
|
||||
|
||||
def generate(duration: int, interval: int):
|
||||
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
ensure_speech_sample()
|
||||
|
||||
video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
|
||||
truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"
|
||||
|
||||
num_scenes = duration // interval
|
||||
nc = len(COLORS)
|
||||
|
||||
# Video: colored segments with timer overlay, concatenated.
|
||||
# Audio: speech sample looped to fill duration (real speech for whisper testing).
|
||||
filter_parts = []
|
||||
for i in range(num_scenes):
|
||||
color = COLORS[i % nc]
|
||||
seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
|
||||
offset = i * interval
|
||||
filter_parts.append(
|
||||
f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
|
||||
f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
|
||||
f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
|
||||
f"borderw=6:bordercolor=black"
|
||||
f"[v{i}]"
|
||||
)
|
||||
|
||||
v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
|
||||
filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
|
||||
# Audio: speech sample is input 1 (input 0 is the lavfi dummy)
|
||||
filter_parts.append(
|
||||
f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
|
||||
f"atrim=0:{duration},volume=0.8[aout]"
|
||||
)
|
||||
|
||||
filter_complex = ";\n".join(filter_parts)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "lavfi", "-i", "anullsrc", # dummy (video segments come from filter)
|
||||
*[arg for i in range(num_scenes) for arg in []], # no extra inputs needed for video
|
||||
"-i", str(SPEECH_SAMPLE), # speech audio input
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[vout]", "-map", "[aout]",
|
||||
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
|
||||
"-g", "30", "-keyint_min", "30",
|
||||
"-c:a", "aac", "-b:a", "128k",
|
||||
"-t", str(duration),
|
||||
str(video_path),
|
||||
"-hide_banner", "-loglevel", "warning",
|
||||
]
|
||||
|
||||
log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
|
||||
log.info("Output: %s", video_path)
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
log.error("ffmpeg failed:\n%s", result.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
log.info("Video generated: %s", video_path)
|
||||
|
||||
# Write ground truth
|
||||
scenes = []
|
||||
for i in range(num_scenes):
|
||||
scenes.append({
|
||||
"scene_index": i,
|
||||
"timestamp_s": i * interval,
|
||||
"color_hex": COLORS[i % nc],
|
||||
})
|
||||
|
||||
truth = {
|
||||
"duration_s": duration,
|
||||
"interval_s": interval,
|
||||
"num_scenes": num_scenes,
|
||||
"video_path": str(video_path),
|
||||
"scenes": scenes,
|
||||
}
|
||||
truth_path.write_text(json.dumps(truth, indent=2))
|
||||
log.info("Ground truth: %s", truth_path)
|
||||
|
||||
return video_path, truth_path
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate CHT test video")
|
||||
parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
|
||||
parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
|
||||
args = parser.parse_args()
|
||||
|
||||
generate(args.duration, args.interval)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user