scene detection quality and caching

This commit is contained in:
Mariano Gabriel
2025-10-28 05:52:31 -03:00
parent c871af2def
commit b1e1daf278
6 changed files with 169 additions and 30 deletions

View File

@@ -12,7 +12,9 @@ logger = logging.getLogger(__name__)
class CacheManager:
"""Manage caching of intermediate processing results."""
def __init__(self, output_dir: Path, frames_dir: Path, video_name: str, use_cache: bool = True):
def __init__(self, output_dir: Path, frames_dir: Path, video_name: str, use_cache: bool = True,
skip_cache_frames: bool = False, skip_cache_whisper: bool = False,
skip_cache_analysis: bool = False):
"""
Initialize cache manager.
@@ -20,12 +22,18 @@ class CacheManager:
output_dir: Output directory for cached files
frames_dir: Directory for cached frames
video_name: Name of the video (stem)
use_cache: Whether to use caching
use_cache: Whether to use caching globally
skip_cache_frames: Skip cached frames specifically
skip_cache_whisper: Skip cached whisper specifically
skip_cache_analysis: Skip cached analysis specifically
"""
self.output_dir = output_dir
self.frames_dir = frames_dir
self.video_name = video_name
self.use_cache = use_cache
self.skip_cache_frames = skip_cache_frames
self.skip_cache_whisper = skip_cache_whisper
self.skip_cache_analysis = skip_cache_analysis
def get_whisper_cache(self) -> Optional[Path]:
"""
@@ -34,7 +42,7 @@ class CacheManager:
Returns:
Path to cached transcript or None
"""
if not self.use_cache:
if not self.use_cache or self.skip_cache_whisper:
return None
cache_path = self.output_dir / f"{self.video_name}.json"
@@ -51,7 +59,7 @@ class CacheManager:
Returns:
List of (frame_path, timestamp) tuples or None
"""
if not self.use_cache or not self.frames_dir.exists():
if not self.use_cache or self.skip_cache_frames or not self.frames_dir.exists():
return None
existing_frames = list(self.frames_dir.glob("frame_*.jpg"))
@@ -84,7 +92,7 @@ class CacheManager:
Returns:
List of analysis results or None
"""
if not self.use_cache:
if not self.use_cache or self.skip_cache_analysis:
return None
cache_path = self.output_dir / f"{self.video_name}_{analysis_type}.json"

View File

@@ -6,9 +6,9 @@ import cv2
import os
from pathlib import Path
from typing import List, Tuple, Optional
import subprocess
import json
import logging
import re
logger = logging.getLogger(__name__)
@@ -56,7 +56,8 @@ class FrameExtractor:
frame_filename = f"frame_{saved_count:05d}_{timestamp:.2f}s.jpg"
frame_path = self.output_dir / frame_filename
cv2.imwrite(str(frame_path), frame)
# Use high quality for text readability (95 = high quality JPEG)
cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
frames_info.append((str(frame_path), timestamp))
saved_count += 1
@@ -66,41 +67,51 @@ class FrameExtractor:
logger.info(f"Extracted {saved_count} frames at {interval_seconds}s intervals")
return frames_info
def extract_scene_changes(self, threshold: float = 30.0) -> List[Tuple[str, float]]:
def extract_scene_changes(self, threshold: float = 15.0) -> List[Tuple[str, float]]:
"""
Extract frames only on scene changes using FFmpeg.
More efficient than interval-based extraction.
Args:
threshold: Scene change detection threshold (0-100, lower = more sensitive)
Default: 15.0 (good for clean UIs like Zed)
Higher values (20-30) for busy UIs like VS Code
Lower values (5-10) for very subtle changes
Returns:
List of (frame_path, timestamp) tuples
"""
try:
import ffmpeg
except ImportError:
raise ImportError("ffmpeg-python not installed. Run: pip install ffmpeg-python")
video_name = Path(self.video_path).stem
output_pattern = self.output_dir / f"{video_name}_%05d.jpg"
# Use FFmpeg's scene detection filter
cmd = [
'ffmpeg',
'-i', self.video_path,
'-vf', f'select=gt(scene\\,{threshold/100}),showinfo',
'-vsync', 'vfr',
'-frame_pts', '1',
str(output_pattern),
'-loglevel', 'info'
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
# Use FFmpeg's scene detection filter with high quality output
stream = ffmpeg.input(self.video_path)
stream = ffmpeg.filter(stream, 'select', f'gt(scene,{threshold/100})')
stream = ffmpeg.filter(stream, 'showinfo')
stream = ffmpeg.output(
stream,
str(output_pattern),
vsync='vfr',
frame_pts=1,
**{'q:v': '2'} # High quality JPEG
)
# Run with stderr capture to get showinfo output
_, stderr = ffmpeg.run(stream, capture_stderr=True, overwrite_output=True)
stderr = stderr.decode('utf-8')
# Parse FFmpeg output to get frame timestamps from showinfo filter
import re
frames_info = []
# Extract timestamps from stderr (showinfo outputs there)
timestamp_pattern = r'pts_time:([\d.]+)'
timestamps = re.findall(timestamp_pattern, result.stderr)
timestamps = re.findall(timestamp_pattern, stderr)
# Match frames to timestamps
frame_files = sorted(self.output_dir.glob(f"{video_name}_*.jpg"))
@@ -113,11 +124,15 @@ class FrameExtractor:
logger.info(f"Extracted {len(frames_info)} frames at scene changes")
return frames_info
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg error: {e.stderr}")
except ffmpeg.Error as e:
logger.error(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
# Fallback to interval extraction
logger.warning("Falling back to interval extraction...")
return self.extract_by_interval()
except Exception as e:
logger.error(f"Unexpected error during scene extraction: {e}")
logger.warning("Falling back to interval extraction...")
return self.extract_by_interval()
def get_video_duration(self) -> float:
"""Get video duration in seconds."""

View File

@@ -31,10 +31,11 @@ class WorkflowConfig:
# Whisper options
self.run_whisper = kwargs.get('run_whisper', False)
self.whisper_model = kwargs.get('whisper_model', 'base')
self.whisper_model = kwargs.get('whisper_model', 'medium')
# Frame extraction
self.scene_detection = kwargs.get('scene_detection', False)
self.scene_threshold = kwargs.get('scene_threshold', 15.0)
self.interval = kwargs.get('interval', 5)
# Analysis options
@@ -46,6 +47,9 @@ class WorkflowConfig:
# Processing options
self.no_deduplicate = kwargs.get('no_deduplicate', False)
self.no_cache = kwargs.get('no_cache', False)
self.skip_cache_frames = kwargs.get('skip_cache_frames', False)
self.skip_cache_whisper = kwargs.get('skip_cache_whisper', False)
self.skip_cache_analysis = kwargs.get('skip_cache_analysis', False)
self.extract_only = kwargs.get('extract_only', False)
self.format = kwargs.get('format', 'detailed')
@@ -58,7 +62,8 @@ class WorkflowConfig:
},
"frame_extraction": {
"method": "scene_detection" if self.scene_detection else "interval",
"interval_seconds": self.interval if not self.scene_detection else None
"interval_seconds": self.interval if not self.scene_detection else None,
"scene_threshold": self.scene_threshold if self.scene_detection else None
},
"analysis": {
"method": "vision" if self.use_vision else "ocr",
@@ -91,7 +96,10 @@ class ProcessingWorkflow:
self.output_mgr.output_dir,
self.output_mgr.frames_dir,
config.video_path.stem,
use_cache=not config.no_cache
use_cache=not config.no_cache,
skip_cache_frames=config.skip_cache_frames,
skip_cache_whisper=config.skip_cache_whisper,
skip_cache_analysis=config.skip_cache_analysis
)
def run(self) -> Dict[str, Any]:
@@ -206,11 +214,17 @@ class ProcessingWorkflow:
if cached_frames:
return cached_frames
# Clean up old frames if regenerating
if self.config.skip_cache_frames and self.output_mgr.frames_dir.exists():
logger.info("Cleaning up old frames...")
for old_frame in self.output_mgr.frames_dir.glob("*.jpg"):
old_frame.unlink()
# Extract frames
extractor = FrameExtractor(str(self.config.video_path), str(self.output_mgr.frames_dir))
if self.config.scene_detection:
frames_info = extractor.extract_scene_changes()
frames_info = extractor.extract_scene_changes(threshold=self.config.scene_threshold)
else:
frames_info = extractor.extract_by_interval(self.config.interval)