scene detection quality and caching

This commit is contained in:
Mariano Gabriel
2025-10-28 05:52:31 -03:00
parent c871af2def
commit b1e1daf278
6 changed files with 169 additions and 30 deletions

View File

@@ -31,10 +31,11 @@ class WorkflowConfig:
# Whisper options
self.run_whisper = kwargs.get('run_whisper', False)
self.whisper_model = kwargs.get('whisper_model', 'base')
self.whisper_model = kwargs.get('whisper_model', 'medium')
# Frame extraction
self.scene_detection = kwargs.get('scene_detection', False)
self.scene_threshold = kwargs.get('scene_threshold', 15.0)
self.interval = kwargs.get('interval', 5)
# Analysis options
@@ -46,6 +47,9 @@ class WorkflowConfig:
# Processing options
self.no_deduplicate = kwargs.get('no_deduplicate', False)
self.no_cache = kwargs.get('no_cache', False)
self.skip_cache_frames = kwargs.get('skip_cache_frames', False)
self.skip_cache_whisper = kwargs.get('skip_cache_whisper', False)
self.skip_cache_analysis = kwargs.get('skip_cache_analysis', False)
self.extract_only = kwargs.get('extract_only', False)
self.format = kwargs.get('format', 'detailed')
@@ -58,7 +62,8 @@ class WorkflowConfig:
},
"frame_extraction": {
"method": "scene_detection" if self.scene_detection else "interval",
"interval_seconds": self.interval if not self.scene_detection else None
"interval_seconds": self.interval if not self.scene_detection else None,
"scene_threshold": self.scene_threshold if self.scene_detection else None
},
"analysis": {
"method": "vision" if self.use_vision else "ocr",
@@ -91,7 +96,10 @@ class ProcessingWorkflow:
self.output_mgr.output_dir,
self.output_mgr.frames_dir,
config.video_path.stem,
use_cache=not config.no_cache
use_cache=not config.no_cache,
skip_cache_frames=config.skip_cache_frames,
skip_cache_whisper=config.skip_cache_whisper,
skip_cache_analysis=config.skip_cache_analysis
)
def run(self) -> Dict[str, Any]:
@@ -206,11 +214,17 @@ class ProcessingWorkflow:
if cached_frames:
return cached_frames
# Clean up old frames if regenerating
if self.config.skip_cache_frames and self.output_mgr.frames_dir.exists():
logger.info("Cleaning up old frames...")
for old_frame in self.output_mgr.frames_dir.glob("*.jpg"):
old_frame.unlink()
# Extract frames
extractor = FrameExtractor(str(self.config.video_path), str(self.output_mgr.frames_dir))
if self.config.scene_detection:
frames_info = extractor.extract_scene_changes()
frames_info = extractor.extract_scene_changes(threshold=self.config.scene_threshold)
else:
frames_info = extractor.extract_by_interval(self.config.interval)