156 lines
5.5 KiB
Python
156 lines
5.5 KiB
Python
"""
|
|
Manage output directories and manifest files.
|
|
Creates timestamped folders for each video and tracks processing options.
|
|
"""
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import json
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class OutputManager:
|
|
"""Manage output directories and manifest files for video processing."""
|
|
|
|
def __init__(self, video_path: Path, base_output_dir: str = "output", use_cache: bool = True):
|
|
"""
|
|
Initialize output manager.
|
|
|
|
Args:
|
|
video_path: Path to the video file being processed
|
|
base_output_dir: Base directory for all outputs
|
|
use_cache: Whether to use existing directories if found
|
|
"""
|
|
self.video_path = video_path
|
|
self.base_output_dir = Path(base_output_dir)
|
|
self.use_cache = use_cache
|
|
|
|
# Find or create output directory
|
|
self.output_dir = self._get_or_create_output_dir()
|
|
self.frames_dir = self.output_dir / "frames"
|
|
self.frames_dir.mkdir(exist_ok=True)
|
|
|
|
logger.info(f"Output directory: {self.output_dir}")
|
|
|
|
def _get_or_create_output_dir(self) -> Path:
|
|
"""
|
|
Get existing output directory or create a new one with incremental number.
|
|
|
|
Returns:
|
|
Path to output directory
|
|
"""
|
|
video_name = self.video_path.stem
|
|
|
|
# Look for existing directories if caching is enabled
|
|
if self.use_cache and self.base_output_dir.exists():
|
|
existing_dirs = sorted([
|
|
d for d in self.base_output_dir.iterdir()
|
|
if d.is_dir() and d.name.endswith(f"-{video_name}")
|
|
], reverse=True) # Most recent first
|
|
|
|
if existing_dirs:
|
|
logger.info(f"Found existing output: {existing_dirs[0].name}")
|
|
return existing_dirs[0]
|
|
|
|
# Create new directory with date + incremental number
|
|
date_str = datetime.now().strftime("%Y%m%d")
|
|
|
|
# Find existing runs for today
|
|
if self.base_output_dir.exists():
|
|
existing_today = [
|
|
d for d in self.base_output_dir.iterdir()
|
|
if d.is_dir() and d.name.startswith(date_str) and d.name.endswith(f"-{video_name}")
|
|
]
|
|
|
|
# Extract run numbers and find max
|
|
run_numbers = []
|
|
for d in existing_today:
|
|
# Format: YYYYMMDD-NNN-videoname
|
|
parts = d.name.split('-')
|
|
if len(parts) >= 2 and parts[1].isdigit():
|
|
run_numbers.append(int(parts[1]))
|
|
|
|
next_run = max(run_numbers) + 1 if run_numbers else 1
|
|
else:
|
|
next_run = 1
|
|
|
|
dir_name = f"{date_str}-{next_run:03d}-{video_name}"
|
|
output_dir = self.base_output_dir / dir_name
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
logger.info(f"Created new output directory: {dir_name}")
|
|
|
|
return output_dir
|
|
|
|
def get_path(self, filename: str) -> Path:
|
|
"""Get full path for a file in the output directory."""
|
|
return self.output_dir / filename
|
|
|
|
def get_frames_path(self, filename: str) -> Path:
|
|
"""Get full path for a file in the frames directory."""
|
|
return self.frames_dir / filename
|
|
|
|
def save_manifest(self, config: Dict[str, Any]):
|
|
"""
|
|
Save processing configuration to manifest.json.
|
|
|
|
Args:
|
|
config: Dictionary of processing options
|
|
"""
|
|
manifest_path = self.output_dir / "manifest.json"
|
|
|
|
manifest = {
|
|
"video": {
|
|
"name": self.video_path.name,
|
|
"path": str(self.video_path.absolute()),
|
|
},
|
|
"processed_at": datetime.now().isoformat(),
|
|
"configuration": config,
|
|
"outputs": {
|
|
"frames": str(self.frames_dir.relative_to(self.output_dir)),
|
|
"enhanced_transcript": f"{self.video_path.stem}_enhanced.txt",
|
|
"whisper_transcript": f"{self.video_path.stem}.json" if config.get("run_whisper") else None,
|
|
"analysis": f"{self.video_path.stem}_{'vision' if config.get('use_vision') else 'ocr'}.json"
|
|
}
|
|
}
|
|
|
|
with open(manifest_path, 'w', encoding='utf-8') as f:
|
|
json.dump(manifest, f, indent=2, ensure_ascii=False)
|
|
|
|
logger.info(f"Saved manifest: {manifest_path}")
|
|
|
|
def load_manifest(self) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Load existing manifest if it exists.
|
|
|
|
Returns:
|
|
Manifest dictionary or None
|
|
"""
|
|
manifest_path = self.output_dir / "manifest.json"
|
|
|
|
if manifest_path.exists():
|
|
with open(manifest_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
return None
|
|
|
|
def list_outputs(self) -> Dict[str, Any]:
|
|
"""
|
|
List all output files in the directory.
|
|
|
|
Returns:
|
|
Dictionary of output files and their status
|
|
"""
|
|
video_name = self.video_path.stem
|
|
|
|
return {
|
|
"output_dir": str(self.output_dir),
|
|
"manifest": (self.output_dir / "manifest.json").exists(),
|
|
"enhanced_transcript": (self.output_dir / f"{video_name}_enhanced.txt").exists(),
|
|
"whisper_transcript": (self.output_dir / f"{video_name}.json").exists(),
|
|
"vision_analysis": (self.output_dir / f"{video_name}_vision.json").exists(),
|
|
"ocr_analysis": (self.output_dir / f"{video_name}_ocr.json").exists(),
|
|
"frames": len(list(self.frames_dir.glob("*.jpg"))) if self.frames_dir.exists() else 0
|
|
}
|