This commit is contained in:
Mariano Gabriel
2025-10-20 00:03:41 -03:00
parent a999bc9093
commit cd7b0aed07
11 changed files with 776 additions and 312 deletions

137
meetus/cache_manager.py Normal file
View File

@@ -0,0 +1,137 @@
"""
Manage caching for frames, transcripts, and analysis results.
"""
from pathlib import Path
import json
import logging
from typing import List, Tuple, Dict, Optional
logger = logging.getLogger(__name__)
class CacheManager:
"""Manage caching of intermediate processing results."""
def __init__(self, output_dir: Path, frames_dir: Path, video_name: str, use_cache: bool = True):
"""
Initialize cache manager.
Args:
output_dir: Output directory for cached files
frames_dir: Directory for cached frames
video_name: Name of the video (stem)
use_cache: Whether to use caching
"""
self.output_dir = output_dir
self.frames_dir = frames_dir
self.video_name = video_name
self.use_cache = use_cache
def get_whisper_cache(self) -> Optional[Path]:
"""
Check for cached Whisper transcript.
Returns:
Path to cached transcript or None
"""
if not self.use_cache:
return None
cache_path = self.output_dir / f"{self.video_name}.json"
if cache_path.exists():
logger.info(f"✓ Found cached Whisper transcript: {cache_path.name}")
return cache_path
return None
def get_frames_cache(self) -> Optional[List[Tuple[str, float]]]:
"""
Check for cached frames.
Returns:
List of (frame_path, timestamp) tuples or None
"""
if not self.use_cache or not self.frames_dir.exists():
return None
existing_frames = list(self.frames_dir.glob("frame_*.jpg"))
if not existing_frames:
return None
logger.info(f"✓ Found {len(existing_frames)} cached frames in {self.frames_dir.name}/")
# Build frames_info from existing files
frames_info = []
for frame_path in sorted(existing_frames):
# Try to extract timestamp from filename (e.g., frame_00001_12.34s.jpg)
try:
timestamp_str = frame_path.stem.split('_')[-1].rstrip('s')
timestamp = float(timestamp_str)
except:
timestamp = 0.0
frames_info.append((str(frame_path), timestamp))
return frames_info
def get_analysis_cache(self, analysis_type: str) -> Optional[List[Dict]]:
"""
Check for cached analysis results.
Args:
analysis_type: 'vision' or 'ocr'
Returns:
List of analysis results or None
"""
if not self.use_cache:
return None
cache_path = self.output_dir / f"{self.video_name}_{analysis_type}.json"
if cache_path.exists():
logger.info(f"✓ Found cached {analysis_type} analysis: {cache_path.name}")
with open(cache_path, 'r', encoding='utf-8') as f:
results = json.load(f)
logger.info(f"✓ Loaded {len(results)} analyzed frames from cache")
return results
return None
def save_analysis(self, analysis_type: str, results: List[Dict]):
"""
Save analysis results to cache.
Args:
analysis_type: 'vision' or 'ocr'
results: Analysis results to save
"""
cache_path = self.output_dir / f"{self.video_name}_{analysis_type}.json"
with open(cache_path, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
logger.info(f"✓ Saved {analysis_type} analysis to: {cache_path.name}")
def cache_exists(self, analysis_type: Optional[str] = None) -> Dict[str, bool]:
"""
Check what caches exist.
Args:
analysis_type: Optional specific analysis type to check
Returns:
Dictionary of cache status
"""
status = {
"whisper": (self.output_dir / f"{self.video_name}.json").exists(),
"frames": len(list(self.frames_dir.glob("frame_*.jpg"))) > 0 if self.frames_dir.exists() else False,
}
if analysis_type:
status[analysis_type] = (self.output_dir / f"{self.video_name}_{analysis_type}.json").exists()
else:
status["vision"] = (self.output_dir / f"{self.video_name}_vision.json").exists()
status["ocr"] = (self.output_dir / f"{self.video_name}_ocr.json").exists()
return status