""" Manage output directories and manifest files. Creates timestamped folders for each video and tracks processing options. """ from pathlib import Path from datetime import datetime import json import logging from typing import Dict, Any, Optional logger = logging.getLogger(__name__) class OutputManager: """Manage output directories and manifest files for video processing.""" def __init__(self, video_path: Path, base_output_dir: str = "output", use_cache: bool = True): """ Initialize output manager. Args: video_path: Path to the video file being processed base_output_dir: Base directory for all outputs use_cache: Whether to use existing directories if found """ self.video_path = video_path self.base_output_dir = Path(base_output_dir) self.use_cache = use_cache # Find or create output directory self.output_dir = self._get_or_create_output_dir() self.frames_dir = self.output_dir / "frames" self.frames_dir.mkdir(exist_ok=True) logger.info(f"Output directory: {self.output_dir}") def _get_or_create_output_dir(self) -> Path: """ Get existing output directory or create a new timestamped one. Returns: Path to output directory """ video_name = self.video_path.stem # Look for existing directories if caching is enabled if self.use_cache and self.base_output_dir.exists(): existing_dirs = sorted([ d for d in self.base_output_dir.iterdir() if d.is_dir() and d.name.endswith(f"-{video_name}") ], reverse=True) # Most recent first if existing_dirs: logger.info(f"Found existing output: {existing_dirs[0].name}") return existing_dirs[0] # Create new timestamped directory timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") dir_name = f"{timestamp}-{video_name}" output_dir = self.base_output_dir / dir_name output_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Created new output directory: {dir_name}") return output_dir def get_path(self, filename: str) -> Path: """Get full path for a file in the output directory.""" return self.output_dir / filename def get_frames_path(self, filename: str) -> Path: """Get full path for a file in the frames directory.""" return self.frames_dir / filename def save_manifest(self, config: Dict[str, Any]): """ Save processing configuration to manifest.json. Args: config: Dictionary of processing options """ manifest_path = self.output_dir / "manifest.json" manifest = { "video": { "name": self.video_path.name, "path": str(self.video_path.absolute()), }, "processed_at": datetime.now().isoformat(), "configuration": config, "outputs": { "frames": str(self.frames_dir.relative_to(self.output_dir)), "enhanced_transcript": f"{self.video_path.stem}_enhanced.txt", "whisper_transcript": f"{self.video_path.stem}.json" if config.get("run_whisper") else None, "analysis": f"{self.video_path.stem}_{'vision' if config.get('use_vision') else 'ocr'}.json" } } with open(manifest_path, 'w', encoding='utf-8') as f: json.dump(manifest, f, indent=2, ensure_ascii=False) logger.info(f"Saved manifest: {manifest_path}") def load_manifest(self) -> Optional[Dict[str, Any]]: """ Load existing manifest if it exists. Returns: Manifest dictionary or None """ manifest_path = self.output_dir / "manifest.json" if manifest_path.exists(): with open(manifest_path, 'r', encoding='utf-8') as f: return json.load(f) return None def list_outputs(self) -> Dict[str, Any]: """ List all output files in the directory. Returns: Dictionary of output files and their status """ video_name = self.video_path.stem return { "output_dir": str(self.output_dir), "manifest": (self.output_dir / "manifest.json").exists(), "enhanced_transcript": (self.output_dir / f"{video_name}_enhanced.txt").exists(), "whisper_transcript": (self.output_dir / f"{video_name}.json").exists(), "vision_analysis": (self.output_dir / f"{video_name}_vision.json").exists(), "ocr_analysis": (self.output_dir / f"{video_name}_ocr.json").exists(), "frames": len(list(self.frames_dir.glob("*.jpg"))) if self.frames_dir.exists() else 0 }