Files
mitus/process_meeting.py
Mariano Gabriel cd7b0aed07 refactor
2025-10-20 00:03:41 -03:00

198 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""
Process meeting recordings to extract audio + screen content.
Combines Whisper transcripts with vision analysis or OCR from screen shares.
"""
import argparse
import sys
import logging
from meetus.workflow import WorkflowConfig, ProcessingWorkflow
def setup_logging(verbose: bool = False):
"""Configure logging for the application."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
# Suppress verbose output from libraries
logging.getLogger('PIL').setLevel(logging.WARNING)
logging.getLogger('easyocr').setLevel(logging.WARNING)
logging.getLogger('paddleocr').setLevel(logging.WARNING)
def main():
parser = argparse.ArgumentParser(
description="Extract screen content from meeting recordings and merge with transcripts",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run Whisper + vision analysis (recommended for code/dashboards)
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision
# Use vision with specific context hint
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --vision-context code
# Traditional OCR approach
python process_meeting.py samples/meeting.mkv --run-whisper
# Re-run analysis using cached frames and transcript
python process_meeting.py samples/meeting.mkv --use-vision
# Force reprocessing (ignore cache)
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --no-cache
# Use scene detection for fewer frames
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --scene-detection
"""
)
# Required arguments
parser.add_argument(
'video',
help='Path to video file'
)
# Whisper options
parser.add_argument(
'--transcript', '-t',
help='Path to Whisper transcript (JSON or TXT)',
default=None
)
parser.add_argument(
'--run-whisper',
action='store_true',
help='Run Whisper transcription before processing'
)
parser.add_argument(
'--whisper-model',
choices=['tiny', 'base', 'small', 'medium', 'large'],
help='Whisper model to use (default: base)',
default='base'
)
# Output options
parser.add_argument(
'--output', '-o',
help='Output file for enhanced transcript (default: auto-generated in output directory)',
default=None
)
parser.add_argument(
'--output-dir',
help='Base directory for outputs (default: output/)',
default='output'
)
# Frame extraction options
parser.add_argument(
'--interval',
type=int,
help='Extract frame every N seconds (default: 5)',
default=5
)
parser.add_argument(
'--scene-detection',
action='store_true',
help='Use scene detection instead of interval extraction'
)
# Analysis options
parser.add_argument(
'--ocr-engine',
choices=['tesseract', 'easyocr', 'paddleocr'],
help='OCR engine to use (default: tesseract)',
default='tesseract'
)
parser.add_argument(
'--use-vision',
action='store_true',
help='Use local vision model (Ollama) instead of OCR for better context understanding'
)
parser.add_argument(
'--vision-model',
help='Vision model to use with Ollama (default: llava:13b)',
default='llava:13b'
)
parser.add_argument(
'--vision-context',
choices=['meeting', 'dashboard', 'code', 'console'],
help='Context hint for vision analysis (default: meeting)',
default='meeting'
)
# Processing options
parser.add_argument(
'--no-cache',
action='store_true',
help='Disable caching - reprocess everything even if outputs exist'
)
parser.add_argument(
'--no-deduplicate',
action='store_true',
help='Disable text deduplication'
)
parser.add_argument(
'--extract-only',
action='store_true',
help='Only extract frames and analyze, skip transcript merging'
)
parser.add_argument(
'--format',
choices=['detailed', 'compact'],
help='Output format style (default: detailed)',
default='detailed'
)
# Logging
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable verbose logging (DEBUG level)'
)
args = parser.parse_args()
# Setup logging
setup_logging(args.verbose)
try:
# Create workflow configuration
config = WorkflowConfig(**vars(args))
# Run processing workflow
workflow = ProcessingWorkflow(config)
result = workflow.run()
# Print final summary
print("\n" + "=" * 80)
print("✓ SUCCESS!")
print("=" * 80)
print(f"Output directory: {result['output_dir']}")
if result.get('enhanced_transcript'):
print(f"Enhanced transcript ready for AI summarization!")
print("=" * 80)
return 0
except FileNotFoundError as e:
logging.error(f"File not found: {e}")
return 1
except RuntimeError as e:
logging.error(f"Processing failed: {e}")
return 1
except KeyboardInterrupt:
logging.warning("\nProcessing interrupted by user")
return 130
except Exception as e:
logging.exception(f"Unexpected error: {e}")
return 1
if __name__ == '__main__':
sys.exit(main())