init commit
This commit is contained in:
229
process_meeting.py
Normal file
229
process_meeting.py
Normal file
@@ -0,0 +1,229 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Process meeting recordings to extract audio + screen content.
|
||||
Combines Whisper transcripts with OCR from screen shares.
|
||||
"""
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
|
||||
from meetus.frame_extractor import FrameExtractor
|
||||
from meetus.ocr_processor import OCRProcessor
|
||||
from meetus.transcript_merger import TranscriptMerger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def setup_logging(verbose: bool = False):
|
||||
"""
|
||||
Configure logging for the application.
|
||||
|
||||
Args:
|
||||
verbose: If True, set DEBUG level, otherwise INFO
|
||||
"""
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
|
||||
# Configure root logger
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
|
||||
# Suppress verbose output from libraries
|
||||
logging.getLogger('PIL').setLevel(logging.WARNING)
|
||||
logging.getLogger('easyocr').setLevel(logging.WARNING)
|
||||
logging.getLogger('paddleocr').setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract screen content from meeting recordings and merge with transcripts",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Process video and extract frames only
|
||||
python process_meeting.py samples/meeting.mkv --extract-only
|
||||
|
||||
# Process video with Whisper transcript
|
||||
python process_meeting.py samples/meeting.mkv --transcript meeting.json
|
||||
|
||||
# Use scene detection instead of interval
|
||||
python process_meeting.py samples/meeting.mkv --scene-detection
|
||||
|
||||
# Use different OCR engine
|
||||
python process_meeting.py samples/meeting.mkv --ocr-engine easyocr
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'video',
|
||||
help='Path to video file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--transcript', '-t',
|
||||
help='Path to Whisper transcript (JSON or TXT)',
|
||||
default=None
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
help='Output file for enhanced transcript (default: <video>_enhanced.txt)',
|
||||
default=None
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--frames-dir',
|
||||
help='Directory to save extracted frames (default: frames/)',
|
||||
default='frames'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--interval',
|
||||
type=int,
|
||||
help='Extract frame every N seconds (default: 5)',
|
||||
default=5
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--scene-detection',
|
||||
action='store_true',
|
||||
help='Use scene detection instead of interval extraction'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--ocr-engine',
|
||||
choices=['tesseract', 'easyocr', 'paddleocr'],
|
||||
help='OCR engine to use (default: tesseract)',
|
||||
default='tesseract'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--no-deduplicate',
|
||||
action='store_true',
|
||||
help='Disable text deduplication'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--extract-only',
|
||||
action='store_true',
|
||||
help='Only extract frames and OCR, skip transcript merging'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--format',
|
||||
choices=['detailed', 'compact'],
|
||||
help='Output format style (default: detailed)',
|
||||
default='detailed'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help='Enable verbose logging (DEBUG level)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
setup_logging(args.verbose)
|
||||
|
||||
# Validate video path
|
||||
video_path = Path(args.video)
|
||||
if not video_path.exists():
|
||||
logger.error(f"Video file not found: {args.video}")
|
||||
sys.exit(1)
|
||||
|
||||
# Set default output path
|
||||
if args.output is None:
|
||||
args.output = video_path.stem + '_enhanced.txt'
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("MEETING PROCESSOR")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Video: {video_path.name}")
|
||||
logger.info(f"OCR Engine: {args.ocr_engine}")
|
||||
logger.info(f"Frame extraction: {'Scene detection' if args.scene_detection else f'Every {args.interval}s'}")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Step 1: Extract frames
|
||||
logger.info("Step 1: Extracting frames from video...")
|
||||
extractor = FrameExtractor(str(video_path), args.frames_dir)
|
||||
|
||||
if args.scene_detection:
|
||||
frames_info = extractor.extract_scene_changes()
|
||||
else:
|
||||
frames_info = extractor.extract_by_interval(args.interval)
|
||||
|
||||
if not frames_info:
|
||||
logger.error("No frames extracted")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"✓ Extracted {len(frames_info)} frames")
|
||||
|
||||
# Step 2: Run OCR on frames
|
||||
logger.info("Step 2: Running OCR on extracted frames...")
|
||||
try:
|
||||
ocr = OCRProcessor(engine=args.ocr_engine)
|
||||
screen_segments = ocr.process_frames(
|
||||
frames_info,
|
||||
deduplicate=not args.no_deduplicate
|
||||
)
|
||||
logger.info(f"✓ Processed {len(screen_segments)} frames with text content")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"{e}")
|
||||
logger.error(f"To install {args.ocr_engine}:")
|
||||
logger.error(f" pip install {args.ocr_engine}")
|
||||
sys.exit(1)
|
||||
|
||||
# Save OCR results as JSON
|
||||
ocr_output = video_path.stem + '_ocr.json'
|
||||
with open(ocr_output, 'w', encoding='utf-8') as f:
|
||||
json.dump(screen_segments, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"✓ Saved OCR results to: {ocr_output}")
|
||||
|
||||
if args.extract_only:
|
||||
logger.info("Done! (extract-only mode)")
|
||||
return
|
||||
|
||||
# Step 3: Merge with transcript (if provided)
|
||||
merger = TranscriptMerger()
|
||||
|
||||
if args.transcript:
|
||||
logger.info("Step 3: Merging with Whisper transcript...")
|
||||
transcript_path = Path(args.transcript)
|
||||
|
||||
if not transcript_path.exists():
|
||||
logger.warning(f"Transcript not found: {args.transcript}")
|
||||
logger.info("Proceeding with screen content only...")
|
||||
audio_segments = []
|
||||
else:
|
||||
audio_segments = merger.load_whisper_transcript(str(transcript_path))
|
||||
logger.info(f"✓ Loaded {len(audio_segments)} audio segments")
|
||||
else:
|
||||
logger.info("No transcript provided, using screen content only...")
|
||||
audio_segments = []
|
||||
|
||||
# Merge and format
|
||||
merged = merger.merge_transcripts(audio_segments, screen_segments)
|
||||
formatted = merger.format_for_claude(merged, format_style=args.format)
|
||||
|
||||
# Save output
|
||||
merger.save_transcript(formatted, args.output)
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("✓ PROCESSING COMPLETE!")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Enhanced transcript: {args.output}")
|
||||
logger.info(f"OCR data: {ocr_output}")
|
||||
logger.info(f"Frames: {args.frames_dir}/")
|
||||
logger.info("")
|
||||
logger.info("You can now use the enhanced transcript with Claude for summarization!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user