From eb8b1f4f11d11e63d70408ab23dfd6700cc34c74 Mon Sep 17 00:00:00 2001
From: Mariano Gabriel <pensalo@gmail.com>
Date: Thu, 4 Dec 2025 20:24:52 -0300
Subject: [PATCH] updated readme

---
 README.md | 76 ++++++++++++++++++-------------------------------------
 1 file changed, 24 insertions(+), 52 deletions(-)

diff --git a/README.md b/README.md
index c1570c9..6a57483 100644
--- a/README.md
+++ b/README.md
@@ -46,25 +46,19 @@ For speaker diarization, you'll need a HuggingFace token with access to pyannote
 
 ## Quick Start
 
-### Recommended: Embed Frames with Scene Detection
+### Recommended Usage
 
 ```bash
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 10 --diarize
 ```
 
 This will:
-1. Run Whisper transcription (audio → text)
-2. Extract frames at scene changes (smarter than fixed intervals)
-3. Embed frame references in the transcript for LLM analysis
+1. Run WhisperX transcription with speaker diarization
+2. Extract frames at scene changes (threshold 10 = moderately sensitive)
+3. Create an enhanced transcript with frame file references
 4. Save everything to `output/` folder
 
-### With Speaker Diarization (WhisperX)
-
-```bash
-python process_meeting.py samples/meeting.mkv --run-whisper --diarize --embed-images --scene-detection
-```
-
-This uses WhisperX to identify different speakers in the transcript.
+The `--embed-images` flag adds frame paths to the transcript (e.g., `Frame: frames/video_00257.jpg`), keeping the transcript small while frames stay in `frames/` folder for LLM access.
 
 ### Re-run with Cached Results
 
@@ -76,48 +70,38 @@ python process_meeting.py samples/meeting.mkv --embed-images
 # Skip only specific cached items
 python process_meeting.py samples/meeting.mkv --embed-images --skip-cache-frames
 python process_meeting.py samples/meeting.mkv --embed-images --skip-cache-whisper
-python process_meeting.py samples/meeting.mkv --embed-images --skip-cache-analysis
 
 # Force complete reprocessing
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --no-cache
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --diarize --no-cache
 ```
 
 ## Usage Examples
 
 ### Scene Detection Options
 ```bash
-# Default scene detection (threshold: 15)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
+# Default threshold (15)
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --diarize
 
-# More sensitive (more frames captured, threshold: 5)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection --scene-threshold 5
+# More sensitive (more frames, threshold: 5)
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 5 --diarize
 
 # Less sensitive (fewer frames, threshold: 30)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection --scene-threshold 30
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 30 --diarize
 ```
 
 ### Fixed Interval Extraction (alternative to scene detection)
 ```bash
 # Every 10 seconds
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --interval 10
+python process_meeting.py samples/meeting.mkv --embed-images --interval 10 --diarize
 
 # Every 3 seconds (more detailed)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --interval 3
-```
-
-### Frame Quality Options
-```bash
-# Default quality (80)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
-
-# Lower quality for smaller files (60)
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection --embed-quality 60
+python process_meeting.py samples/meeting.mkv --embed-images --interval 3 --diarize
 ```
 
 ### Caching Examples
 ```bash
 # First run - processes everything
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 10 --diarize
 
 # Iterate on scene threshold (reuse whisper transcript)
 python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 5 --skip-cache-frames --skip-cache-analysis
@@ -126,17 +110,17 @@ python process_meeting.py samples/meeting.mkv --embed-images --scene-detection -
 python process_meeting.py samples/meeting.mkv --embed-images --skip-cache-whisper
 
 # Force complete reprocessing
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --no-cache
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --diarize --no-cache
 ```
 
 ### Custom output location
 ```bash
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --output-dir my_outputs/
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --diarize --output-dir my_outputs/
 ```
 
 ### Enable verbose logging
 ```bash
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection --verbose
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --diarize --verbose
 ```
 
 ## Output Files
@@ -175,24 +159,17 @@ This allows you to iterate on scene detection thresholds without re-running Whis
 ### Complete Workflow (One Command!)
 
 ```bash
-# Process everything in one step with scene detection
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
-
-# With speaker diarization
-python process_meeting.py samples/meeting.mkv --run-whisper --diarize --embed-images --scene-detection
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 10 --diarize
 ```
 
 ### Typical Iterative Workflow
 
 ```bash
 # First run - full processing
-python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 10 --diarize
 
 # Adjust scene threshold (keeps cached whisper transcript)
-python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 10 --skip-cache-frames --skip-cache-analysis
-
-# Try different frame quality
-python process_meeting.py samples/meeting.mkv --embed-images --embed-quality 60 --skip-cache-frames --skip-cache-analysis
+python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 5 --skip-cache-frames --skip-cache-analysis
 ```
 
 ### Example Prompt for Claude
@@ -223,11 +200,8 @@ usage: process_meeting.py [-h] [--transcript TRANSCRIPT] [--run-whisper]
 
 Main Options:
   video                   Path to video file
-  --run-whisper           Run Whisper transcription before processing
-  --whisper-model         Whisper model: tiny, base, small, medium, large (default: medium)
   --diarize               Use WhisperX with speaker diarization
-  --embed-images          Embed frame references for LLM analysis (recommended)
-  --embed-quality         JPEG quality for frames (default: 80)
+  --embed-images          Add frame file references to transcript (recommended)
 
 Frame Extraction:
   --scene-detection       Use FFmpeg scene detection (recommended)
@@ -241,6 +215,8 @@ Caching:
   --skip-cache-analysis   Re-run analysis only
 
 Other:
+  --run-whisper           Run Whisper (without diarization)
+  --whisper-model         Whisper model: tiny, base, small, medium, large (default: medium)
   --transcript, -t        Path to existing Whisper transcript (JSON or TXT)
   --output, -o            Output file for enhanced transcript
   --output-dir            Directory for output files (default: output/)
@@ -262,10 +238,6 @@ Other:
 - **Whisper** (`--run-whisper`): Standard transcription, fast
 - **WhisperX** (`--run-whisper --diarize`): Adds speaker identification, requires HuggingFace token
 
-### Frame Quality
-- Default quality (80) works well for most cases
-- Use `--embed-quality 60` for smaller files if storage is a concern
-
 ### Deduplication
 - Enabled by default - removes similar consecutive frames
 - Disable with `--no-deduplicate` if slides/screens change subtly