#!/usr/bin/env python3 """ Process meeting recordings to extract audio + screen content. Combines Whisper transcripts with vision analysis or OCR from screen shares. """ import argparse import sys import logging from meetus.workflow import WorkflowConfig, ProcessingWorkflow def setup_logging(verbose: bool = False): """Configure logging for the application.""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) # Suppress verbose output from libraries logging.getLogger('PIL').setLevel(logging.WARNING) logging.getLogger('easyocr').setLevel(logging.WARNING) logging.getLogger('paddleocr').setLevel(logging.WARNING) def main(): parser = argparse.ArgumentParser( description="Extract screen content from meeting recordings and merge with transcripts", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run Whisper + vision analysis (recommended for code/dashboards) python process_meeting.py samples/meeting.mkv --run-whisper --use-vision # Use vision with specific context hint python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --vision-context code # Traditional OCR approach python process_meeting.py samples/meeting.mkv --run-whisper # Re-run analysis using cached frames and transcript python process_meeting.py samples/meeting.mkv --use-vision # Force reprocessing (ignore cache) python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --no-cache # Use scene detection for fewer frames python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --scene-detection """ ) # Required arguments parser.add_argument( 'video', help='Path to video file' ) # Whisper options parser.add_argument( '--transcript', '-t', help='Path to Whisper transcript (JSON or TXT)', default=None ) parser.add_argument( '--run-whisper', action='store_true', help='Run Whisper transcription before processing' ) parser.add_argument( '--whisper-model', choices=['tiny', 'base', 'small', 'medium', 'large'], help='Whisper model to use (default: base)', default='base' ) # Output options parser.add_argument( '--output', '-o', help='Output file for enhanced transcript (default: auto-generated in output directory)', default=None ) parser.add_argument( '--output-dir', help='Base directory for outputs (default: output/)', default='output' ) # Frame extraction options parser.add_argument( '--interval', type=int, help='Extract frame every N seconds (default: 5)', default=5 ) parser.add_argument( '--scene-detection', action='store_true', help='Use scene detection instead of interval extraction' ) # Analysis options parser.add_argument( '--ocr-engine', choices=['tesseract', 'easyocr', 'paddleocr'], help='OCR engine to use (default: tesseract)', default='tesseract' ) parser.add_argument( '--use-vision', action='store_true', help='Use local vision model (Ollama) instead of OCR for better context understanding' ) parser.add_argument( '--vision-model', help='Vision model to use with Ollama (default: llava:13b)', default='llava:13b' ) parser.add_argument( '--vision-context', choices=['meeting', 'dashboard', 'code', 'console'], help='Context hint for vision analysis (default: meeting)', default='meeting' ) # Processing options parser.add_argument( '--no-cache', action='store_true', help='Disable caching - reprocess everything even if outputs exist' ) parser.add_argument( '--no-deduplicate', action='store_true', help='Disable text deduplication' ) parser.add_argument( '--extract-only', action='store_true', help='Only extract frames and analyze, skip transcript merging' ) parser.add_argument( '--format', choices=['detailed', 'compact'], help='Output format style (default: detailed)', default='detailed' ) # Logging parser.add_argument( '--verbose', '-v', action='store_true', help='Enable verbose logging (DEBUG level)' ) args = parser.parse_args() # Setup logging setup_logging(args.verbose) try: # Create workflow configuration config = WorkflowConfig(**vars(args)) # Run processing workflow workflow = ProcessingWorkflow(config) result = workflow.run() # Print final summary print("\n" + "=" * 80) print("✓ SUCCESS!") print("=" * 80) print(f"Output directory: {result['output_dir']}") if result.get('enhanced_transcript'): print(f"Enhanced transcript ready for AI summarization!") print("=" * 80) return 0 except FileNotFoundError as e: logging.error(f"File not found: {e}") return 1 except RuntimeError as e: logging.error(f"Processing failed: {e}") return 1 except KeyboardInterrupt: logging.warning("\nProcessing interrupted by user") return 130 except Exception as e: logging.exception(f"Unexpected error: {e}") return 1 if __name__ == '__main__': sys.exit(main())