refactor
This commit is contained in:
135
meetus/output_manager.py
Normal file
135
meetus/output_manager.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Manage output directories and manifest files.
|
||||
Creates timestamped folders for each video and tracks processing options.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OutputManager:
|
||||
"""Manage output directories and manifest files for video processing."""
|
||||
|
||||
def __init__(self, video_path: Path, base_output_dir: str = "output", use_cache: bool = True):
|
||||
"""
|
||||
Initialize output manager.
|
||||
|
||||
Args:
|
||||
video_path: Path to the video file being processed
|
||||
base_output_dir: Base directory for all outputs
|
||||
use_cache: Whether to use existing directories if found
|
||||
"""
|
||||
self.video_path = video_path
|
||||
self.base_output_dir = Path(base_output_dir)
|
||||
self.use_cache = use_cache
|
||||
|
||||
# Find or create output directory
|
||||
self.output_dir = self._get_or_create_output_dir()
|
||||
self.frames_dir = self.output_dir / "frames"
|
||||
self.frames_dir.mkdir(exist_ok=True)
|
||||
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
|
||||
def _get_or_create_output_dir(self) -> Path:
|
||||
"""
|
||||
Get existing output directory or create a new timestamped one.
|
||||
|
||||
Returns:
|
||||
Path to output directory
|
||||
"""
|
||||
video_name = self.video_path.stem
|
||||
|
||||
# Look for existing directories if caching is enabled
|
||||
if self.use_cache and self.base_output_dir.exists():
|
||||
existing_dirs = sorted([
|
||||
d for d in self.base_output_dir.iterdir()
|
||||
if d.is_dir() and d.name.endswith(f"-{video_name}")
|
||||
], reverse=True) # Most recent first
|
||||
|
||||
if existing_dirs:
|
||||
logger.info(f"Found existing output: {existing_dirs[0].name}")
|
||||
return existing_dirs[0]
|
||||
|
||||
# Create new timestamped directory
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
dir_name = f"{timestamp}-{video_name}"
|
||||
output_dir = self.base_output_dir / dir_name
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Created new output directory: {dir_name}")
|
||||
|
||||
return output_dir
|
||||
|
||||
def get_path(self, filename: str) -> Path:
|
||||
"""Get full path for a file in the output directory."""
|
||||
return self.output_dir / filename
|
||||
|
||||
def get_frames_path(self, filename: str) -> Path:
|
||||
"""Get full path for a file in the frames directory."""
|
||||
return self.frames_dir / filename
|
||||
|
||||
def save_manifest(self, config: Dict[str, Any]):
|
||||
"""
|
||||
Save processing configuration to manifest.json.
|
||||
|
||||
Args:
|
||||
config: Dictionary of processing options
|
||||
"""
|
||||
manifest_path = self.output_dir / "manifest.json"
|
||||
|
||||
manifest = {
|
||||
"video": {
|
||||
"name": self.video_path.name,
|
||||
"path": str(self.video_path.absolute()),
|
||||
},
|
||||
"processed_at": datetime.now().isoformat(),
|
||||
"configuration": config,
|
||||
"outputs": {
|
||||
"frames": str(self.frames_dir.relative_to(self.output_dir)),
|
||||
"enhanced_transcript": f"{self.video_path.stem}_enhanced.txt",
|
||||
"whisper_transcript": f"{self.video_path.stem}.json" if config.get("run_whisper") else None,
|
||||
"analysis": f"{self.video_path.stem}_{'vision' if config.get('use_vision') else 'ocr'}.json"
|
||||
}
|
||||
}
|
||||
|
||||
with open(manifest_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(manifest, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Saved manifest: {manifest_path}")
|
||||
|
||||
def load_manifest(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Load existing manifest if it exists.
|
||||
|
||||
Returns:
|
||||
Manifest dictionary or None
|
||||
"""
|
||||
manifest_path = self.output_dir / "manifest.json"
|
||||
|
||||
if manifest_path.exists():
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
return None
|
||||
|
||||
def list_outputs(self) -> Dict[str, Any]:
|
||||
"""
|
||||
List all output files in the directory.
|
||||
|
||||
Returns:
|
||||
Dictionary of output files and their status
|
||||
"""
|
||||
video_name = self.video_path.stem
|
||||
|
||||
return {
|
||||
"output_dir": str(self.output_dir),
|
||||
"manifest": (self.output_dir / "manifest.json").exists(),
|
||||
"enhanced_transcript": (self.output_dir / f"{video_name}_enhanced.txt").exists(),
|
||||
"whisper_transcript": (self.output_dir / f"{video_name}.json").exists(),
|
||||
"vision_analysis": (self.output_dir / f"{video_name}_vision.json").exists(),
|
||||
"ocr_analysis": (self.output_dir / f"{video_name}_ocr.json").exists(),
|
||||
"frames": len(list(self.frames_dir.glob("*.jpg"))) if self.frames_dir.exists() else 0
|
||||
}
|
||||
Reference in New Issue
Block a user