#!/usr/bin/env python3
"""Generate a test video with known scene changes and audio markers.

The video is played fullscreen on the sender while kmsgrab captures it —
simulating a meeting with deterministic, reproducible content.

- Scene changes: solid color blocks every INTERVAL seconds
- Overlay: large elapsed-seconds counter for visual sync measurement
- Audio: sine tone (changes frequency each scene for transcription ground truth)

Outputs:
    tests/fixtures/test_scene_30s.mp4
    tests/fixtures/test_scene_30s_ground_truth.json

Usage:
    python ctrl/gen_test_video.py [--duration 30] [--interval 5]
"""

import argparse
import json
import logging
import subprocess
import sys
from pathlib import Path

log = logging.getLogger("gen_test_video")

PROJECT_DIR = Path(__file__).resolve().parent.parent
FIXTURES_DIR = PROJECT_DIR / "tests" / "fixtures"

# Scene colors (RGB hex)
COLORS = ["FF0000", "0000FF", "00FF00", "FFFF00", "FF00FF", "00FFFF"]

# Speech sample (Harvard sentences, public domain, Open Speech Repository)
SPEECH_SAMPLE = FIXTURES_DIR / "test_speech_harvard.wav"
SPEECH_URL = "http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"


def ensure_speech_sample():
    """Download speech sample if not present."""
    FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
    if SPEECH_SAMPLE.exists():
        return
    log.info("Downloading speech sample from Open Speech Repository...")
    import urllib.request
    urllib.request.urlretrieve(SPEECH_URL, SPEECH_SAMPLE)
    log.info("Saved: %s", SPEECH_SAMPLE)


def generate(duration: int, interval: int):
    FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
    ensure_speech_sample()

    video_path = FIXTURES_DIR / f"test_scene_{duration}s.mp4"
    truth_path = FIXTURES_DIR / f"test_scene_{duration}s_ground_truth.json"

    num_scenes = duration // interval
    nc = len(COLORS)

    # Video: colored segments with timer overlay, concatenated.
    # Audio: speech sample looped to fill duration (real speech for whisper testing).
    filter_parts = []
    for i in range(num_scenes):
        color = COLORS[i % nc]
        seg_dur = interval if (i + 1) * interval <= duration else duration - i * interval
        offset = i * interval
        filter_parts.append(
            f"color=c=0x{color}:s=1920x1080:d={seg_dur}:r=30,"
            f"drawtext=text='%{{eif\\:t+{offset}\\:d}}s':"
            f"fontsize=200:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:"
            f"borderw=6:bordercolor=black"
            f"[v{i}]"
        )

    v_inputs = "".join(f"[v{i}]" for i in range(num_scenes))
    filter_parts.append(f"{v_inputs}concat=n={num_scenes}:v=1:a=0[vout]")
    # Audio: speech sample is input 1 (input 0 is the lavfi dummy)
    filter_parts.append(
        f"[1:a]aresample=48000,aloop=loop=-1:size=48000*{duration},"
        f"atrim=0:{duration},volume=0.8[aout]"
    )

    filter_complex = ";\n".join(filter_parts)

    cmd = [
        "ffmpeg", "-y",
        "-f", "lavfi", "-i", "anullsrc",  # dummy (video segments come from filter)
        *[arg for i in range(num_scenes) for arg in []],  # no extra inputs needed for video
        "-i", str(SPEECH_SAMPLE),  # speech audio input
        "-filter_complex", filter_complex,
        "-map", "[vout]", "-map", "[aout]",
        "-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
        "-g", "30", "-keyint_min", "30",
        "-c:a", "aac", "-b:a", "128k",
        "-t", str(duration),
        str(video_path),
        "-hide_banner", "-loglevel", "warning",
    ]

    log.info("Generating %ds test video (%d scenes, %ds interval, speech audio)", duration, num_scenes, interval)
    log.info("Output: %s", video_path)

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        log.error("ffmpeg failed:\n%s", result.stderr)
        sys.exit(1)

    log.info("Video generated: %s", video_path)

    # Write ground truth
    scenes = []
    for i in range(num_scenes):
        scenes.append({
            "scene_index": i,
            "timestamp_s": i * interval,
            "color_hex": COLORS[i % nc],
        })

    truth = {
        "duration_s": duration,
        "interval_s": interval,
        "num_scenes": num_scenes,
        "video_path": str(video_path),
        "scenes": scenes,
    }
    truth_path.write_text(json.dumps(truth, indent=2))
    log.info("Ground truth: %s", truth_path)

    return video_path, truth_path


def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
        datefmt="%H:%M:%S",
    )

    parser = argparse.ArgumentParser(description="Generate CHT test video")
    parser.add_argument("--duration", type=int, default=30, help="Video duration in seconds")
    parser.add_argument("--interval", type=int, default=5, help="Seconds between scene changes")
    args = parser.parse_args()

    generate(args.duration, args.interval)


if __name__ == "__main__":
    main()