This commit is contained in:
2026-03-30 09:53:10 -03:00
parent 4220b0418e
commit aac27b8504
32 changed files with 1068 additions and 329 deletions

View File

@@ -1,7 +1,19 @@
"""Frame image storage — save/load to S3/MinIO as JPEGs."""
"""
Frame cache — per-timeline frame storage in blob storage (S3/MinIO).
Frames are extracted from chunks once, cached as JPEGs at
cache/timelines/{timeline_id}/frames/{seq}.jpg in the app's
blob storage. Any job on the timeline reads from the cache.
Cache is clearable and rebuildable from chunks.
Uses the same storage backend as the rest of the app, so it
works across lambdas, GPU boxes, and local dev.
"""
from __future__ import annotations
import base64
import io
import logging
import os
import tempfile
@@ -14,25 +26,39 @@ from core.detect.models import Frame
logger = logging.getLogger(__name__)
BUCKET = os.environ.get("S3_BUCKET", "mpr")
CHECKPOINT_PREFIX = "checkpoints"
CACHE_PREFIX = "cache/timelines"
def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
def _frame_key(timeline_id: str, seq: int) -> str:
return f"{CACHE_PREFIX}/{timeline_id}/frames/{seq}.jpg"
def _list_prefix(timeline_id: str) -> str:
return f"{CACHE_PREFIX}/{timeline_id}/frames/"
def cache_exists(timeline_id: str) -> bool:
"""Check if frame cache exists for a timeline."""
from core.storage.s3 import list_objects
objects = list_objects(BUCKET, _list_prefix(timeline_id))
return len(objects) > 0
def cache_frames(timeline_id: str, frames: list[Frame], quality: int = 85) -> int:
"""
Save frame images to S3 as JPEGs.
Write frames to blob storage as JPEGs.
Returns manifest: {sequence: s3_key}
Returns number of frames cached.
"""
from core.storage.s3 import upload_file
manifest = {}
for frame in frames:
key = f"{CHECKPOINT_PREFIX}/{job_id}/frames/{frame.sequence}.jpg"
key = _frame_key(timeline_id, frame.sequence)
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
img = Image.fromarray(frame.image)
img.save(tmp, format="JPEG", quality=85)
img.save(tmp, format="JPEG", quality=quality)
tmp_path = tmp.name
try:
@@ -40,25 +66,30 @@ def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
finally:
os.unlink(tmp_path)
manifest[frame.sequence] = key
logger.info("Saved %d frames to s3://%s/%s/%s/frames/",
len(frames), BUCKET, CHECKPOINT_PREFIX, job_id)
return manifest
logger.info("Cached %d frames for timeline %s", len(frames), timeline_id)
return len(frames)
def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Frame]:
def load_cached_frames(timeline_id: str) -> list[Frame]:
"""
Load frame images from S3 and reconstitute Frame objects.
Load all cached frames as Frame objects with numpy arrays.
frame_metadata: list of dicts with sequence, chunk_id, timestamp, perceptual_hash.
Returns empty list if cache doesn't exist.
"""
from core.storage.s3 import download_to_temp
from core.storage.s3 import list_objects, download_to_temp
objects = list_objects(BUCKET, _list_prefix(timeline_id))
if not objects:
return []
meta_map = {m["sequence"]: m for m in frame_metadata}
frames = []
for obj in objects:
key = obj["key"]
filename = key.rsplit("/", 1)[-1]
if not filename.endswith(".jpg"):
continue
seq = int(filename.replace(".jpg", ""))
for seq, key in manifest.items():
tmp_path = download_to_temp(BUCKET, key)
try:
img = Image.open(tmp_path).convert("RGB")
@@ -66,13 +97,12 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
finally:
os.unlink(tmp_path)
meta = meta_map.get(seq, {})
frame = Frame(
sequence=seq,
chunk_id=meta.get("chunk_id", 0),
timestamp=meta.get("timestamp", 0.0),
chunk_id=0,
timestamp=0.0,
image=image_array,
perceptual_hash=meta.get("perceptual_hash", ""),
perceptual_hash="",
)
frames.append(frame)
@@ -80,32 +110,70 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
return frames
def load_frames_b64(manifest: dict[int, str], frame_metadata: list[dict]) -> list[dict]:
def load_cached_frames_b64(timeline_id: str) -> list[dict]:
"""
Load frame images from S3 as base64 JPEG — lightweight, no numpy.
Load cached frames as base64 JPEGs for the UI.
Returns list of dicts: {seq, timestamp, jpeg_b64}
Returns list of {seq, timestamp, jpeg_b64}.
"""
import base64
from core.storage.s3 import download_to_temp
from core.storage.s3 import list_objects, download_to_temp
meta_map = {m["sequence"]: m for m in frame_metadata}
frames = []
objects = list_objects(BUCKET, _list_prefix(timeline_id))
if not objects:
return []
result = []
for obj in objects:
key = obj["key"]
filename = key.rsplit("/", 1)[-1]
if not filename.endswith(".jpg"):
continue
seq = int(filename.replace(".jpg", ""))
for seq, key in manifest.items():
tmp_path = download_to_temp(BUCKET, key)
try:
with open(tmp_path, "rb") as f:
jpeg_bytes = f.read()
jpeg_b64 = base64.b64encode(f.read()).decode()
finally:
os.unlink(tmp_path)
meta = meta_map.get(seq, {})
frames.append({
result.append({
"seq": seq,
"timestamp": meta.get("timestamp", 0.0),
"jpeg_b64": base64.b64encode(jpeg_bytes).decode(),
"timestamp": 0.0,
"jpeg_b64": jpeg_b64,
})
frames.sort(key=lambda f: f["seq"])
return frames
result.sort(key=lambda f: f["seq"])
return result
def clear_cache(timeline_id: str):
"""Delete the frame cache for a timeline."""
from core.storage.s3 import delete_objects
prefix = _list_prefix(timeline_id)
delete_objects(BUCKET, prefix)
logger.info("Cleared frame cache for timeline %s", timeline_id)
def frames_to_b64(frames: list[Frame], quality: int = 75) -> list[dict]:
"""
Convert in-memory Frame objects to base64 JPEG dicts.
For API responses when frames are already in memory.
"""
result = []
for frame in frames:
buf = io.BytesIO()
img = Image.fromarray(frame.image)
img.save(buf, format="JPEG", quality=quality)
jpeg_b64 = base64.b64encode(buf.getvalue()).decode()
result.append({
"seq": frame.sequence,
"timestamp": frame.timestamp,
"jpeg_b64": jpeg_b64,
})
result.sort(key=lambda f: f["seq"])
return result