embed images
This commit is contained in:
@@ -32,23 +32,20 @@ def main():
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Run Whisper + vision analysis (recommended for code/dashboards)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision
|
||||
# Embed images for LLM analysis (recommended - let LLM analyze actual frames)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --scene-detection
|
||||
|
||||
# Use vision with specific context hint
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --vision-context code
|
||||
# Embed with custom quality (lower = smaller file size)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --embed-images --embed-quality 60 --scene-detection
|
||||
|
||||
# Traditional OCR approach
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper
|
||||
# Hybrid approach: OpenCV + OCR (extracts text, no images)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-hybrid --scene-detection
|
||||
|
||||
# Re-run analysis using cached frames and transcript
|
||||
python process_meeting.py samples/meeting.mkv --use-vision
|
||||
# Hybrid + LLM cleanup (best for code formatting)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-hybrid --hybrid-llm-cleanup --scene-detection
|
||||
|
||||
# Force reprocessing (ignore cache)
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --no-cache
|
||||
|
||||
# Use scene detection for fewer frames
|
||||
python process_meeting.py samples/meeting.mkv --run-whisper --use-vision --scene-detection
|
||||
# Iterate on scene threshold (reuse whisper transcript)
|
||||
python process_meeting.py samples/meeting.mkv --embed-images --scene-detection --scene-threshold 5 --skip-cache-frames --skip-cache-analysis
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -119,6 +116,21 @@ Examples:
|
||||
action='store_true',
|
||||
help='Use local vision model (Ollama) instead of OCR for better context understanding'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--use-hybrid',
|
||||
action='store_true',
|
||||
help='Use hybrid approach: OpenCV text detection + OCR (more accurate than vision models)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--hybrid-llm-cleanup',
|
||||
action='store_true',
|
||||
help='Use LLM to clean up OCR output and preserve code formatting (requires --use-hybrid)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--hybrid-llm-model',
|
||||
help='LLM model for cleanup (default: llama3.2:3b)',
|
||||
default='llama3.2:3b'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--vision-model',
|
||||
help='Vision model to use with Ollama (default: llava:13b)',
|
||||
@@ -168,6 +180,17 @@ Examples:
|
||||
help='Output format style (default: detailed)',
|
||||
default='detailed'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--embed-images',
|
||||
action='store_true',
|
||||
help='Embed frame images (as base64) in enhanced transcript for LLM analysis'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--embed-quality',
|
||||
type=int,
|
||||
help='JPEG quality for embedded images (default: 80, lower = smaller file)',
|
||||
default=80
|
||||
)
|
||||
|
||||
# Logging
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user