refactor
This commit is contained in:
@@ -6,6 +6,7 @@ from typing import List, Tuple, Dict, Optional
|
||||
from pathlib import Path
|
||||
import logging
|
||||
from difflib import SequenceMatcher
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,15 +14,24 @@ logger = logging.getLogger(__name__)
|
||||
class VisionProcessor:
|
||||
"""Process frames using local vision models via Ollama."""
|
||||
|
||||
def __init__(self, model: str = "llava:13b"):
|
||||
def __init__(self, model: str = "llava:13b", prompts_dir: Optional[str] = None):
|
||||
"""
|
||||
Initialize vision processor.
|
||||
|
||||
Args:
|
||||
model: Ollama vision model to use (llava:13b, llava:7b, llava-llama3, bakllava)
|
||||
prompts_dir: Directory containing prompt files (default: meetus/prompts/)
|
||||
"""
|
||||
self.model = model
|
||||
self._client = None
|
||||
|
||||
# Set prompts directory
|
||||
if prompts_dir:
|
||||
self.prompts_dir = Path(prompts_dir)
|
||||
else:
|
||||
# Default to meetus/prompts/ relative to this file
|
||||
self.prompts_dir = Path(__file__).parent / "prompts"
|
||||
|
||||
self._init_client()
|
||||
|
||||
def _init_client(self):
|
||||
@@ -53,6 +63,26 @@ class VisionProcessor:
|
||||
"Also install Ollama: https://ollama.ai/download"
|
||||
)
|
||||
|
||||
def _load_prompt(self, context: str) -> str:
|
||||
"""
|
||||
Load prompt from file.
|
||||
|
||||
Args:
|
||||
context: Context name (meeting, dashboard, code, console)
|
||||
|
||||
Returns:
|
||||
Prompt text
|
||||
"""
|
||||
prompt_file = self.prompts_dir / f"{context}.txt"
|
||||
|
||||
if prompt_file.exists():
|
||||
with open(prompt_file, 'r', encoding='utf-8') as f:
|
||||
return f.read().strip()
|
||||
else:
|
||||
# Fallback to default prompt
|
||||
logger.warning(f"Prompt file not found: {prompt_file}, using default")
|
||||
return "Analyze this image and describe what you see in detail."
|
||||
|
||||
def analyze_frame(self, image_path: str, context: str = "meeting") -> str:
|
||||
"""
|
||||
Analyze a single frame using local vision model.
|
||||
@@ -64,50 +94,8 @@ class VisionProcessor:
|
||||
Returns:
|
||||
Analyzed content description
|
||||
"""
|
||||
# Context-specific prompts
|
||||
prompts = {
|
||||
"meeting": """Analyze this screen capture from a meeting recording. Extract:
|
||||
1. Any visible text (titles, labels, headings)
|
||||
2. Key metrics, numbers, or data points shown
|
||||
3. Dashboard panels or visualizations (describe what they show)
|
||||
4. Code snippets (preserve formatting and context)
|
||||
5. Console/terminal output (commands and results)
|
||||
6. Application names or UI elements
|
||||
|
||||
Focus on information that would help someone understand what was being discussed.
|
||||
Be concise but include all important details. If there's code, preserve it exactly.""",
|
||||
|
||||
"dashboard": """Analyze this dashboard/monitoring panel. Extract:
|
||||
1. Panel titles and metrics names
|
||||
2. Current values and units
|
||||
3. Trends (up/down/stable)
|
||||
4. Alerts or warnings
|
||||
5. Time ranges shown
|
||||
6. Any anomalies or notable patterns
|
||||
|
||||
Format as structured data.""",
|
||||
|
||||
"code": """Analyze this code screenshot. Extract:
|
||||
1. Programming language
|
||||
2. File name or path (if visible)
|
||||
3. Code content (preserve exact formatting)
|
||||
4. Comments
|
||||
5. Function/class names
|
||||
6. Any error messages or warnings
|
||||
|
||||
Preserve code exactly as shown.""",
|
||||
|
||||
"console": """Analyze this console/terminal output. Extract:
|
||||
1. Commands executed
|
||||
2. Output/results
|
||||
3. Error messages
|
||||
4. Warnings or status messages
|
||||
5. File paths or URLs
|
||||
|
||||
Preserve formatting and structure."""
|
||||
}
|
||||
|
||||
prompt = prompts.get(context, prompts["meeting"])
|
||||
# Load prompt from file
|
||||
prompt = self._load_prompt(context)
|
||||
|
||||
try:
|
||||
# Use Ollama's chat API with vision
|
||||
|
||||
Reference in New Issue
Block a user