#!/usr/bin/env python3 """ Process meeting recordings to extract audio + screen content. Combines Whisper transcripts with OCR from screen shares. """ import argparse from pathlib import Path import sys import json import logging import subprocess import shutil from meetus.frame_extractor import FrameExtractor from meetus.ocr_processor import OCRProcessor from meetus.transcript_merger import TranscriptMerger logger = logging.getLogger(__name__) def setup_logging(verbose: bool = False): """ Configure logging for the application. Args: verbose: If True, set DEBUG level, otherwise INFO """ level = logging.DEBUG if verbose else logging.INFO # Configure root logger logging.basicConfig( level=level, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) # Suppress verbose output from libraries logging.getLogger('PIL').setLevel(logging.WARNING) logging.getLogger('easyocr').setLevel(logging.WARNING) logging.getLogger('paddleocr').setLevel(logging.WARNING) def run_whisper(video_path: Path, model: str = "base", output_dir: str = "output") -> Path: """ Run Whisper transcription on video file. Args: video_path: Path to video file model: Whisper model to use (tiny, base, small, medium, large) output_dir: Directory to save output Returns: Path to generated JSON transcript """ # Check if whisper is installed if not shutil.which("whisper"): logger.error("Whisper is not installed. Install it with: pip install openai-whisper") sys.exit(1) logger.info(f"Running Whisper transcription (model: {model})...") logger.info("This may take a few minutes depending on video length...") # Run whisper command cmd = [ "whisper", str(video_path), "--model", model, "--output_format", "json", "--output_dir", output_dir ] try: result = subprocess.run( cmd, check=True, capture_output=True, text=True ) # Whisper outputs to /.json transcript_path = Path(output_dir) / f"{video_path.stem}.json" if transcript_path.exists(): logger.info(f"✓ Whisper transcription completed: {transcript_path}") return transcript_path else: logger.error("Whisper completed but transcript file not found") sys.exit(1) except subprocess.CalledProcessError as e: logger.error(f"Whisper failed: {e.stderr}") sys.exit(1) def main(): parser = argparse.ArgumentParser( description="Extract screen content from meeting recordings and merge with transcripts", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run Whisper + full processing in one command python process_meeting.py samples/meeting.mkv --run-whisper # Process video with existing Whisper transcript python process_meeting.py samples/meeting.mkv --transcript output/meeting.json # Use scene detection instead of interval python process_meeting.py samples/meeting.mkv --run-whisper --scene-detection # Use different Whisper model and OCR engine python process_meeting.py samples/meeting.mkv --run-whisper --whisper-model small --ocr-engine easyocr # Extract frames only (no transcript) python process_meeting.py samples/meeting.mkv --extract-only """ ) parser.add_argument( 'video', help='Path to video file' ) parser.add_argument( '--transcript', '-t', help='Path to Whisper transcript (JSON or TXT)', default=None ) parser.add_argument( '--run-whisper', action='store_true', help='Run Whisper transcription before processing' ) parser.add_argument( '--whisper-model', choices=['tiny', 'base', 'small', 'medium', 'large'], help='Whisper model to use (default: base)', default='base' ) parser.add_argument( '--output', '-o', help='Output file for enhanced transcript (default: output/