refactor

2025-10-20 00:03:41 -03:00
parent a999bc9093
commit cd7b0aed07
11 changed files with 776 additions and 312 deletions
--- a/meetus/vision_processor.py
+++ b/meetus/vision_processor.py
@@ -6,6 +6,7 @@ from typing import List, Tuple, Dict, Optional
 from pathlib import Path
 import logging
 from difflib import SequenceMatcher
+import os

 logger = logging.getLogger(__name__)

@@ -13,15 +14,24 @@ logger = logging.getLogger(__name__)
 class VisionProcessor:
    """Process frames using local vision models via Ollama."""

-    def __init__(self, model: str = "llava:13b"):
+    def __init__(self, model: str = "llava:13b", prompts_dir: Optional[str] = None):
        """
        Initialize vision processor.

        Args:
            model: Ollama vision model to use (llava:13b, llava:7b, llava-llama3, bakllava)
+            prompts_dir: Directory containing prompt files (default: meetus/prompts/)
        """
        self.model = model
        self._client = None
+
+        # Set prompts directory
+        if prompts_dir:
+            self.prompts_dir = Path(prompts_dir)
+        else:
+            # Default to meetus/prompts/ relative to this file
+            self.prompts_dir = Path(__file__).parent / "prompts"
+
        self._init_client()

    def _init_client(self):
@@ -53,6 +63,26 @@ class VisionProcessor:
                "Also install Ollama: https://ollama.ai/download"
            )

+    def _load_prompt(self, context: str) -> str:
+        """
+        Load prompt from file.
+
+        Args:
+            context: Context name (meeting, dashboard, code, console)
+
+        Returns:
+            Prompt text
+        """
+        prompt_file = self.prompts_dir / f"{context}.txt"
+
+        if prompt_file.exists():
+            with open(prompt_file, 'r', encoding='utf-8') as f:
+                return f.read().strip()
+        else:
+            # Fallback to default prompt
+            logger.warning(f"Prompt file not found: {prompt_file}, using default")
+            return "Analyze this image and describe what you see in detail."
+
    def analyze_frame(self, image_path: str, context: str = "meeting") -> str:
        """
        Analyze a single frame using local vision model.
@@ -64,50 +94,8 @@ class VisionProcessor:
        Returns:
            Analyzed content description
        """
-        # Context-specific prompts
-        prompts = {
-            "meeting": """Analyze this screen capture from a meeting recording. Extract:
-1. Any visible text (titles, labels, headings)
-2. Key metrics, numbers, or data points shown
-3. Dashboard panels or visualizations (describe what they show)
-4. Code snippets (preserve formatting and context)
-5. Console/terminal output (commands and results)
-6. Application names or UI elements
-
-Focus on information that would help someone understand what was being discussed.
-Be concise but include all important details. If there's code, preserve it exactly.""",
-
-            "dashboard": """Analyze this dashboard/monitoring panel. Extract:
-1. Panel titles and metrics names
-2. Current values and units
-3. Trends (up/down/stable)
-4. Alerts or warnings
-5. Time ranges shown
-6. Any anomalies or notable patterns
-
-Format as structured data.""",
-
-            "code": """Analyze this code screenshot. Extract:
-1. Programming language
-2. File name or path (if visible)
-3. Code content (preserve exact formatting)
-4. Comments
-5. Function/class names
-6. Any error messages or warnings
-
-Preserve code exactly as shown.""",
-
-            "console": """Analyze this console/terminal output. Extract:
-1. Commands executed
-2. Output/results
-3. Error messages
-4. Warnings or status messages
-5. File paths or URLs
-
-Preserve formatting and structure."""
-        }
-
-        prompt = prompts.get(context, prompts["meeting"])
+        # Load prompt from file
+        prompt = self._load_prompt(context)

        try:
            # Use Ollama's chat API with vision