add whisperx support
This commit is contained in:
@@ -4,6 +4,7 @@ Coordinates frame extraction, analysis, and transcript merging.
|
||||
"""
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import shutil
|
||||
from typing import Dict, Any, Optional
|
||||
@@ -32,6 +33,7 @@ class WorkflowConfig:
|
||||
# Whisper options
|
||||
self.run_whisper = kwargs.get('run_whisper', False)
|
||||
self.whisper_model = kwargs.get('whisper_model', 'medium')
|
||||
self.diarize = kwargs.get('diarize', False)
|
||||
|
||||
# Frame extraction
|
||||
self.scene_detection = kwargs.get('scene_detection', False)
|
||||
@@ -176,18 +178,27 @@ class ProcessingWorkflow:
|
||||
if cached:
|
||||
return str(cached)
|
||||
|
||||
# If no cache and not running whisper, use provided transcript path (if any)
|
||||
if not self.config.run_whisper:
|
||||
# If no cache and not running whisper/diarize, use provided transcript path (if any)
|
||||
if not self.config.run_whisper and not self.config.diarize:
|
||||
return self.config.transcript_path
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("STEP 0: Running Whisper Transcription")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Check if whisperx is installed
|
||||
if not shutil.which("whisperx"):
|
||||
logger.error("WhisperX is not installed. Install it with: pip install whisperx")
|
||||
raise RuntimeError("WhisperX not installed")
|
||||
# Determine which transcription tool to use
|
||||
use_diarize = getattr(self.config, 'diarize', False)
|
||||
|
||||
if use_diarize:
|
||||
if not shutil.which("whisperx"):
|
||||
logger.error("WhisperX is not installed. Install it with: pip install whisperx")
|
||||
raise RuntimeError("WhisperX not installed (required for --diarize)")
|
||||
transcribe_cmd = "whisperx"
|
||||
else:
|
||||
if not shutil.which("whisper"):
|
||||
logger.error("Whisper is not installed. Install it with: pip install openai-whisper")
|
||||
raise RuntimeError("Whisper not installed")
|
||||
transcribe_cmd = "whisper"
|
||||
|
||||
# Unload Ollama model to free GPU memory for Whisper (if using vision)
|
||||
if self.config.use_vision:
|
||||
@@ -199,21 +210,34 @@ class ProcessingWorkflow:
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not unload Ollama model: {e}")
|
||||
|
||||
logger.info(f"Running WhisperX transcription with diarization (model: {self.config.whisper_model})...")
|
||||
if use_diarize:
|
||||
logger.info(f"Running WhisperX transcription with diarization (model: {self.config.whisper_model})...")
|
||||
else:
|
||||
logger.info(f"Running Whisper transcription (model: {self.config.whisper_model})...")
|
||||
logger.info("This may take a few minutes depending on video length...")
|
||||
|
||||
# Run whisperx command with diarization
|
||||
# Build command
|
||||
cmd = [
|
||||
"whisperx",
|
||||
transcribe_cmd,
|
||||
str(self.config.video_path),
|
||||
"--model", self.config.whisper_model,
|
||||
"--output_format", "json",
|
||||
"--output_dir", str(self.output_mgr.output_dir),
|
||||
"--diarize",
|
||||
]
|
||||
if use_diarize:
|
||||
cmd.append("--diarize")
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
||||
# Set up environment with cuDNN library path for whisperx
|
||||
env = os.environ.copy()
|
||||
if use_diarize:
|
||||
import site
|
||||
site_packages = site.getsitepackages()[0]
|
||||
cudnn_path = Path(site_packages) / "nvidia" / "cudnn" / "lib"
|
||||
if cudnn_path.exists():
|
||||
env["LD_LIBRARY_PATH"] = str(cudnn_path) + ":" + env.get("LD_LIBRARY_PATH", "")
|
||||
|
||||
subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)
|
||||
|
||||
transcript_path = self.output_mgr.get_path(f"{self.config.video_path.stem}.json")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user