embed images
This commit is contained in:
@@ -6,6 +6,8 @@ from typing import List, Dict, Optional
|
||||
import json
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import base64
|
||||
from io import BytesIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,9 +15,16 @@ logger = logging.getLogger(__name__)
|
||||
class TranscriptMerger:
|
||||
"""Merge audio transcripts with screen OCR text."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize transcript merger."""
|
||||
pass
|
||||
def __init__(self, embed_images: bool = False, embed_quality: int = 80):
|
||||
"""
|
||||
Initialize transcript merger.
|
||||
|
||||
Args:
|
||||
embed_images: Whether to embed frame images as base64
|
||||
embed_quality: JPEG quality for embedded images (0-100)
|
||||
"""
|
||||
self.embed_images = embed_images
|
||||
self.embed_quality = embed_quality
|
||||
|
||||
def load_whisper_transcript(self, transcript_path: str, group_interval: Optional[int] = None) -> List[Dict]:
|
||||
"""
|
||||
@@ -120,6 +129,32 @@ class TranscriptMerger:
|
||||
logger.info(f"Grouped {len(segments)} segments into {len(intervals)} intervals of {interval_seconds}s")
|
||||
return intervals
|
||||
|
||||
def _encode_image_base64(self, image_path: str) -> tuple[str, int]:
|
||||
"""
|
||||
Encode image as base64 (image already at target quality/size).
|
||||
|
||||
Args:
|
||||
image_path: Path to image file
|
||||
|
||||
Returns:
|
||||
Tuple of (base64_string, size_in_bytes)
|
||||
"""
|
||||
try:
|
||||
# Read file directly (already at target quality/resolution)
|
||||
with open(image_path, 'rb') as f:
|
||||
img_bytes = f.read()
|
||||
|
||||
# Encode to base64
|
||||
b64_string = base64.b64encode(img_bytes).decode('utf-8')
|
||||
|
||||
logger.debug(f"Encoded {Path(image_path).name}: {len(img_bytes)} bytes")
|
||||
|
||||
return b64_string, len(img_bytes)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to encode image {image_path}: {e}")
|
||||
return "", 0
|
||||
|
||||
def merge_transcripts(
|
||||
self,
|
||||
audio_segments: List[Dict],
|
||||
@@ -172,10 +207,15 @@ class TranscriptMerger:
|
||||
lines = []
|
||||
lines.append("=" * 80)
|
||||
lines.append("ENHANCED MEETING TRANSCRIPT")
|
||||
lines.append("Audio transcript + Screen content")
|
||||
if self.embed_images:
|
||||
lines.append("Audio transcript + Embedded frame images (base64)")
|
||||
else:
|
||||
lines.append("Audio transcript + Screen content")
|
||||
lines.append("=" * 80)
|
||||
lines.append("")
|
||||
|
||||
total_image_bytes = 0
|
||||
|
||||
for seg in segments:
|
||||
timestamp = self._format_timestamp(seg['timestamp'])
|
||||
|
||||
@@ -186,11 +226,31 @@ class TranscriptMerger:
|
||||
|
||||
else: # screen
|
||||
lines.append(f"[{timestamp}] SCREEN CONTENT:")
|
||||
# Indent screen text for visibility
|
||||
screen_text = seg['text'].replace('\n', '\n | ')
|
||||
lines.append(f" | {screen_text}")
|
||||
|
||||
# Embed image if requested
|
||||
if self.embed_images and 'frame_path' in seg:
|
||||
b64_img, img_size = self._encode_image_base64(seg['frame_path'])
|
||||
total_image_bytes += img_size
|
||||
|
||||
if b64_img:
|
||||
lines.append(f" IMAGE (base64, {img_size // 1024}KB):")
|
||||
lines.append(f" <image>data:image/jpeg;base64,{b64_img}</image>")
|
||||
lines.append("")
|
||||
|
||||
# Include text content if available (fallback or additional context)
|
||||
if 'text' in seg and seg['text'].strip():
|
||||
screen_text = seg['text'].replace('\n', '\n | ')
|
||||
lines.append(f" TEXT:")
|
||||
lines.append(f" | {screen_text}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
if self.embed_images and total_image_bytes > 0:
|
||||
total_mb = total_image_bytes / (1024 * 1024)
|
||||
lines.append("")
|
||||
lines.append(f"Total embedded images size: {total_mb:.2f} MB")
|
||||
logger.info(f"Embedded {len([s for s in segments if s['type'] == 'screen'])} images, total size: {total_mb:.2f} MB")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_compact(self, segments: List[Dict]) -> str:
|
||||
|
||||
Reference in New Issue
Block a user