add whisper to main command, ignore output files
This commit is contained in:
@@ -8,6 +8,8 @@ from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
from meetus.frame_extractor import FrameExtractor
|
||||
from meetus.ocr_processor import OCRProcessor
|
||||
@@ -38,23 +40,78 @@ def setup_logging(verbose: bool = False):
|
||||
logging.getLogger('paddleocr').setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def run_whisper(video_path: Path, model: str = "base", output_dir: str = "output") -> Path:
|
||||
"""
|
||||
Run Whisper transcription on video file.
|
||||
|
||||
Args:
|
||||
video_path: Path to video file
|
||||
model: Whisper model to use (tiny, base, small, medium, large)
|
||||
output_dir: Directory to save output
|
||||
|
||||
Returns:
|
||||
Path to generated JSON transcript
|
||||
"""
|
||||
# Check if whisper is installed
|
||||
if not shutil.which("whisper"):
|
||||
logger.error("Whisper is not installed. Install it with: pip install openai-whisper")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"Running Whisper transcription (model: {model})...")
|
||||
logger.info("This may take a few minutes depending on video length...")
|
||||
|
||||
# Run whisper command
|
||||
cmd = [
|
||||
"whisper",
|
||||
str(video_path),
|
||||
"--model", model,
|
||||
"--output_format", "json",
|
||||
"--output_dir", output_dir
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Whisper outputs to <output_dir>/<video_stem>.json
|
||||
transcript_path = Path(output_dir) / f"{video_path.stem}.json"
|
||||
|
||||
if transcript_path.exists():
|
||||
logger.info(f"✓ Whisper transcription completed: {transcript_path}")
|
||||
return transcript_path
|
||||
else:
|
||||
logger.error("Whisper completed but transcript file not found")
|
||||
sys.exit(1)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Whisper failed: {e.stderr}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract screen content from meeting recordings and merge with transcripts",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Process video and extract frames only
|
||||
python process_meeting.py samples/meeting.mkv --extract-only
|
||||
# Run Whisper + full processing in one command
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper
|
||||
|
||||
# Process video with Whisper transcript
|
||||
python process_meeting.py samples/meeting.mkv --transcript meeting.json
|
||||
# Process video with existing Whisper transcript
|
||||
python process_meeting.py samples/meeting.mkv --transcript output/meeting.json
|
||||
|
||||
# Use scene detection instead of interval
|
||||
python process_meeting.py samples/meeting.mkv --scene-detection
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --scene-detection
|
||||
|
||||
# Use different OCR engine
|
||||
python process_meeting.py samples/meeting.mkv --ocr-engine easyocr
|
||||
# Use different Whisper model and OCR engine
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --whisper-model small --ocr-engine easyocr
|
||||
|
||||
# Extract frames only (no transcript)
|
||||
python process_meeting.py samples/meeting.mkv --extract-only
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -69,12 +126,31 @@ Examples:
|
||||
default=None
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--run-whisper',
|
||||
action='store_true',
|
||||
help='Run Whisper transcription before processing'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--whisper-model',
|
||||
choices=['tiny', 'base', 'small', 'medium', 'large'],
|
||||
help='Whisper model to use (default: base)',
|
||||
default='base'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
help='Output file for enhanced transcript (default: <video>_enhanced.txt)',
|
||||
help='Output file for enhanced transcript (default: output/<video>_enhanced.txt)',
|
||||
default=None
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
help='Directory for output files (default: output/)',
|
||||
default='output'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--frames-dir',
|
||||
help='Directory to save extracted frames (default: frames/)',
|
||||
@@ -137,9 +213,22 @@ Examples:
|
||||
logger.error(f"Video file not found: {args.video}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create output directory
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Set default output path
|
||||
if args.output is None:
|
||||
args.output = video_path.stem + '_enhanced.txt'
|
||||
args.output = str(output_dir / f"{video_path.stem}_enhanced.txt")
|
||||
|
||||
# Run Whisper if requested
|
||||
if args.run_whisper:
|
||||
logger.info("=" * 80)
|
||||
logger.info("STEP 0: Running Whisper Transcription")
|
||||
logger.info("=" * 80)
|
||||
transcript_path = run_whisper(video_path, args.whisper_model, str(output_dir))
|
||||
args.transcript = str(transcript_path)
|
||||
logger.info("")
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("MEETING PROCESSOR")
|
||||
@@ -147,6 +236,8 @@ Examples:
|
||||
logger.info(f"Video: {video_path.name}")
|
||||
logger.info(f"OCR Engine: {args.ocr_engine}")
|
||||
logger.info(f"Frame extraction: {'Scene detection' if args.scene_detection else f'Every {args.interval}s'}")
|
||||
if args.transcript:
|
||||
logger.info(f"Transcript: {args.transcript}")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Step 1: Extract frames
|
||||
@@ -181,7 +272,7 @@ Examples:
|
||||
sys.exit(1)
|
||||
|
||||
# Save OCR results as JSON
|
||||
ocr_output = video_path.stem + '_ocr.json'
|
||||
ocr_output = output_dir / f"{video_path.stem}_ocr.json"
|
||||
with open(ocr_output, 'w', encoding='utf-8') as f:
|
||||
json.dump(screen_segments, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"✓ Saved OCR results to: {ocr_output}")
|
||||
|
||||
Reference in New Issue
Block a user