a

2026-03-30 09:53:10 -03:00
parent 4220b0418e
commit aac27b8504
32 changed files with 1068 additions and 329 deletions
--- a/core/detect/checkpoint/frames.py
+++ b/core/detect/checkpoint/frames.py
@@ -1,7 +1,19 @@
-"""Frame image storage — save/load to S3/MinIO as JPEGs."""
+"""
+Frame cache — per-timeline frame storage in blob storage (S3/MinIO).
+
+Frames are extracted from chunks once, cached as JPEGs at
+cache/timelines/{timeline_id}/frames/{seq}.jpg in the app's
+blob storage. Any job on the timeline reads from the cache.
+Cache is clearable and rebuildable from chunks.
+
+Uses the same storage backend as the rest of the app, so it
+works across lambdas, GPU boxes, and local dev.
+"""

 from __future__ import annotations

+import base64
+import io
 import logging
 import os
 import tempfile
@@ -14,25 +26,39 @@ from core.detect.models import Frame
 logger = logging.getLogger(__name__)

 BUCKET = os.environ.get("S3_BUCKET", "mpr")
-CHECKPOINT_PREFIX = "checkpoints"
+CACHE_PREFIX = "cache/timelines"


-def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
+def _frame_key(timeline_id: str, seq: int) -> str:
+    return f"{CACHE_PREFIX}/{timeline_id}/frames/{seq}.jpg"
+
+
+def _list_prefix(timeline_id: str) -> str:
+    return f"{CACHE_PREFIX}/{timeline_id}/frames/"
+
+
+def cache_exists(timeline_id: str) -> bool:
+    """Check if frame cache exists for a timeline."""
+    from core.storage.s3 import list_objects
+
+    objects = list_objects(BUCKET, _list_prefix(timeline_id))
+    return len(objects) > 0
+
+
+def cache_frames(timeline_id: str, frames: list[Frame], quality: int = 85) -> int:
    """
-    Save frame images to S3 as JPEGs.
+    Write frames to blob storage as JPEGs.

-    Returns manifest: {sequence: s3_key}
+    Returns number of frames cached.
    """
    from core.storage.s3 import upload_file

-    manifest = {}
-
    for frame in frames:
-        key = f"{CHECKPOINT_PREFIX}/{job_id}/frames/{frame.sequence}.jpg"
+        key = _frame_key(timeline_id, frame.sequence)

        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
            img = Image.fromarray(frame.image)
-            img.save(tmp, format="JPEG", quality=85)
+            img.save(tmp, format="JPEG", quality=quality)
            tmp_path = tmp.name

        try:
@@ -40,25 +66,30 @@ def save_frames(job_id: str, frames: list[Frame]) -> dict[int, str]:
        finally:
            os.unlink(tmp_path)

-        manifest[frame.sequence] = key
-
-    logger.info("Saved %d frames to s3://%s/%s/%s/frames/",
-                len(frames), BUCKET, CHECKPOINT_PREFIX, job_id)
-    return manifest
+    logger.info("Cached %d frames for timeline %s", len(frames), timeline_id)
+    return len(frames)


-def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Frame]:
+def load_cached_frames(timeline_id: str) -> list[Frame]:
    """
-    Load frame images from S3 and reconstitute Frame objects.
+    Load all cached frames as Frame objects with numpy arrays.

-    frame_metadata: list of dicts with sequence, chunk_id, timestamp, perceptual_hash.
+    Returns empty list if cache doesn't exist.
    """
-    from core.storage.s3 import download_to_temp
+    from core.storage.s3 import list_objects, download_to_temp
+
+    objects = list_objects(BUCKET, _list_prefix(timeline_id))
+    if not objects:
+        return []

-    meta_map = {m["sequence"]: m for m in frame_metadata}
    frames = []
+    for obj in objects:
+        key = obj["key"]
+        filename = key.rsplit("/", 1)[-1]
+        if not filename.endswith(".jpg"):
+            continue
+        seq = int(filename.replace(".jpg", ""))

-    for seq, key in manifest.items():
        tmp_path = download_to_temp(BUCKET, key)
        try:
            img = Image.open(tmp_path).convert("RGB")
@@ -66,13 +97,12 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
        finally:
            os.unlink(tmp_path)

-        meta = meta_map.get(seq, {})
        frame = Frame(
            sequence=seq,
-            chunk_id=meta.get("chunk_id", 0),
-            timestamp=meta.get("timestamp", 0.0),
+            chunk_id=0,
+            timestamp=0.0,
            image=image_array,
-            perceptual_hash=meta.get("perceptual_hash", ""),
+            perceptual_hash="",
        )
        frames.append(frame)

@@ -80,32 +110,70 @@ def load_frames(manifest: dict[int, str], frame_metadata: list[dict]) -> list[Fr
    return frames


-def load_frames_b64(manifest: dict[int, str], frame_metadata: list[dict]) -> list[dict]:
+def load_cached_frames_b64(timeline_id: str) -> list[dict]:
    """
-    Load frame images from S3 as base64 JPEG — lightweight, no numpy.
+    Load cached frames as base64 JPEGs for the UI.

-    Returns list of dicts: {seq, timestamp, jpeg_b64}
+    Returns list of {seq, timestamp, jpeg_b64}.
    """
-    import base64
-    from core.storage.s3 import download_to_temp
+    from core.storage.s3 import list_objects, download_to_temp

-    meta_map = {m["sequence"]: m for m in frame_metadata}
-    frames = []
+    objects = list_objects(BUCKET, _list_prefix(timeline_id))
+    if not objects:
+        return []
+
+    result = []
+    for obj in objects:
+        key = obj["key"]
+        filename = key.rsplit("/", 1)[-1]
+        if not filename.endswith(".jpg"):
+            continue
+        seq = int(filename.replace(".jpg", ""))

-    for seq, key in manifest.items():
        tmp_path = download_to_temp(BUCKET, key)
        try:
            with open(tmp_path, "rb") as f:
-                jpeg_bytes = f.read()
+                jpeg_b64 = base64.b64encode(f.read()).decode()
        finally:
            os.unlink(tmp_path)

-        meta = meta_map.get(seq, {})
-        frames.append({
+        result.append({
            "seq": seq,
-            "timestamp": meta.get("timestamp", 0.0),
-            "jpeg_b64": base64.b64encode(jpeg_bytes).decode(),
+            "timestamp": 0.0,
+            "jpeg_b64": jpeg_b64,
        })

-    frames.sort(key=lambda f: f["seq"])
-    return frames
+    result.sort(key=lambda f: f["seq"])
+    return result
+
+
+def clear_cache(timeline_id: str):
+    """Delete the frame cache for a timeline."""
+    from core.storage.s3 import delete_objects
+
+    prefix = _list_prefix(timeline_id)
+    delete_objects(BUCKET, prefix)
+    logger.info("Cleared frame cache for timeline %s", timeline_id)
+
+
+def frames_to_b64(frames: list[Frame], quality: int = 75) -> list[dict]:
+    """
+    Convert in-memory Frame objects to base64 JPEG dicts.
+
+    For API responses when frames are already in memory.
+    """
+    result = []
+    for frame in frames:
+        buf = io.BytesIO()
+        img = Image.fromarray(frame.image)
+        img.save(buf, format="JPEG", quality=quality)
+        jpeg_b64 = base64.b64encode(buf.getvalue()).decode()
+
+        result.append({
+            "seq": frame.sequence,
+            "timestamp": frame.timestamp,
+            "jpeg_b64": jpeg_b64,
+        })
+
+    result.sort(key=lambda f: f["seq"])
+    return result