Files
mitus/meetus/output_manager.py
Mariano Gabriel cd7b0aed07 refactor
2025-10-20 00:03:41 -03:00

136 lines
4.8 KiB
Python

"""
Manage output directories and manifest files.
Creates timestamped folders for each video and tracks processing options.
"""
from pathlib import Path
from datetime import datetime
import json
import logging
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
class OutputManager:
"""Manage output directories and manifest files for video processing."""
def __init__(self, video_path: Path, base_output_dir: str = "output", use_cache: bool = True):
"""
Initialize output manager.
Args:
video_path: Path to the video file being processed
base_output_dir: Base directory for all outputs
use_cache: Whether to use existing directories if found
"""
self.video_path = video_path
self.base_output_dir = Path(base_output_dir)
self.use_cache = use_cache
# Find or create output directory
self.output_dir = self._get_or_create_output_dir()
self.frames_dir = self.output_dir / "frames"
self.frames_dir.mkdir(exist_ok=True)
logger.info(f"Output directory: {self.output_dir}")
def _get_or_create_output_dir(self) -> Path:
"""
Get existing output directory or create a new timestamped one.
Returns:
Path to output directory
"""
video_name = self.video_path.stem
# Look for existing directories if caching is enabled
if self.use_cache and self.base_output_dir.exists():
existing_dirs = sorted([
d for d in self.base_output_dir.iterdir()
if d.is_dir() and d.name.endswith(f"-{video_name}")
], reverse=True) # Most recent first
if existing_dirs:
logger.info(f"Found existing output: {existing_dirs[0].name}")
return existing_dirs[0]
# Create new timestamped directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
dir_name = f"{timestamp}-{video_name}"
output_dir = self.base_output_dir / dir_name
output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Created new output directory: {dir_name}")
return output_dir
def get_path(self, filename: str) -> Path:
"""Get full path for a file in the output directory."""
return self.output_dir / filename
def get_frames_path(self, filename: str) -> Path:
"""Get full path for a file in the frames directory."""
return self.frames_dir / filename
def save_manifest(self, config: Dict[str, Any]):
"""
Save processing configuration to manifest.json.
Args:
config: Dictionary of processing options
"""
manifest_path = self.output_dir / "manifest.json"
manifest = {
"video": {
"name": self.video_path.name,
"path": str(self.video_path.absolute()),
},
"processed_at": datetime.now().isoformat(),
"configuration": config,
"outputs": {
"frames": str(self.frames_dir.relative_to(self.output_dir)),
"enhanced_transcript": f"{self.video_path.stem}_enhanced.txt",
"whisper_transcript": f"{self.video_path.stem}.json" if config.get("run_whisper") else None,
"analysis": f"{self.video_path.stem}_{'vision' if config.get('use_vision') else 'ocr'}.json"
}
}
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest, f, indent=2, ensure_ascii=False)
logger.info(f"Saved manifest: {manifest_path}")
def load_manifest(self) -> Optional[Dict[str, Any]]:
"""
Load existing manifest if it exists.
Returns:
Manifest dictionary or None
"""
manifest_path = self.output_dir / "manifest.json"
if manifest_path.exists():
with open(manifest_path, 'r', encoding='utf-8') as f:
return json.load(f)
return None
def list_outputs(self) -> Dict[str, Any]:
"""
List all output files in the directory.
Returns:
Dictionary of output files and their status
"""
video_name = self.video_path.stem
return {
"output_dir": str(self.output_dir),
"manifest": (self.output_dir / "manifest.json").exists(),
"enhanced_transcript": (self.output_dir / f"{video_name}_enhanced.txt").exists(),
"whisper_transcript": (self.output_dir / f"{video_name}.json").exists(),
"vision_analysis": (self.output_dir / f"{video_name}_vision.json").exists(),
"ocr_analysis": (self.output_dir / f"{video_name}_ocr.json").exists(),
"frames": len(list(self.frames_dir.glob("*.jpg"))) if self.frames_dir.exists() else 0
}