add whisperx support

2025-12-03 06:48:45 -03:00
parent 7b919beda6
commit 7d7ec15ff7
4 changed files with 87 additions and 16 deletions
--- a/meetus/workflow.py
+++ b/meetus/workflow.py
@@ -4,6 +4,7 @@ Coordinates frame extraction, analysis, and transcript merging.
 """
 from pathlib import Path
 import logging
+import os
 import subprocess
 import shutil
 from typing import Dict, Any, Optional
@@ -32,6 +33,7 @@ class WorkflowConfig:
        # Whisper options
        self.run_whisper = kwargs.get('run_whisper', False)
        self.whisper_model = kwargs.get('whisper_model', 'medium')
+        self.diarize = kwargs.get('diarize', False)

        # Frame extraction
        self.scene_detection = kwargs.get('scene_detection', False)
@@ -176,18 +178,27 @@ class ProcessingWorkflow:
        if cached:
            return str(cached)

-        # If no cache and not running whisper, use provided transcript path (if any)
-        if not self.config.run_whisper:
+        # If no cache and not running whisper/diarize, use provided transcript path (if any)
+        if not self.config.run_whisper and not self.config.diarize:
            return self.config.transcript_path

        logger.info("=" * 80)
        logger.info("STEP 0: Running Whisper Transcription")
        logger.info("=" * 80)

-        # Check if whisperx is installed
-        if not shutil.which("whisperx"):
-            logger.error("WhisperX is not installed. Install it with: pip install whisperx")
-            raise RuntimeError("WhisperX not installed")
+        # Determine which transcription tool to use
+        use_diarize = getattr(self.config, 'diarize', False)
+
+        if use_diarize:
+            if not shutil.which("whisperx"):
+                logger.error("WhisperX is not installed. Install it with: pip install whisperx")
+                raise RuntimeError("WhisperX not installed (required for --diarize)")
+            transcribe_cmd = "whisperx"
+        else:
+            if not shutil.which("whisper"):
+                logger.error("Whisper is not installed. Install it with: pip install openai-whisper")
+                raise RuntimeError("Whisper not installed")
+            transcribe_cmd = "whisper"

        # Unload Ollama model to free GPU memory for Whisper (if using vision)
        if self.config.use_vision:
@@ -199,21 +210,34 @@ class ProcessingWorkflow:
            except Exception as e:
                logger.warning(f"Could not unload Ollama model: {e}")

-        logger.info(f"Running WhisperX transcription with diarization (model: {self.config.whisper_model})...")
+        if use_diarize:
+            logger.info(f"Running WhisperX transcription with diarization (model: {self.config.whisper_model})...")
+        else:
+            logger.info(f"Running Whisper transcription (model: {self.config.whisper_model})...")
        logger.info("This may take a few minutes depending on video length...")

-        # Run whisperx command with diarization
+        # Build command
        cmd = [
-            "whisperx",
+            transcribe_cmd,
            str(self.config.video_path),
            "--model", self.config.whisper_model,
            "--output_format", "json",
            "--output_dir", str(self.output_mgr.output_dir),
-            "--diarize",
        ]
+        if use_diarize:
+            cmd.append("--diarize")

        try:
-            subprocess.run(cmd, check=True, capture_output=True, text=True)
+            # Set up environment with cuDNN library path for whisperx
+            env = os.environ.copy()
+            if use_diarize:
+                import site
+                site_packages = site.getsitepackages()[0]
+                cudnn_path = Path(site_packages) / "nvidia" / "cudnn" / "lib"
+                if cudnn_path.exists():
+                    env["LD_LIBRARY_PATH"] = str(cudnn_path) + ":" + env.get("LD_LIBRARY_PATH", "")
+
+            subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)

            transcript_path = self.output_mgr.get_path(f"{self.config.video_path.stem}.json")