113 lines
3.4 KiB
Python
113 lines
3.4 KiB
Python
"""
|
|
Cloud-agnostic blob storage interface.
|
|
|
|
All file-based sources (chunks, uploads, checkpoints) go through MinIO.
|
|
Local dev runs MinIO in docker-compose — same code path as production.
|
|
Production changes S3_ENDPOINT_URL; nothing else changes.
|
|
|
|
Single bucket, multiple prefixes:
|
|
in/ — source media
|
|
out/ — transcoded chunks
|
|
checkpoints/ — detection intermediate blobs (frames, crops)
|
|
|
|
Each prefix is independently configurable via env vars so they can
|
|
be split into separate buckets later if needed.
|
|
|
|
Nothing outside core/storage/ should import boto3 directly.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
|
|
# Single bucket, prefix-based layout
|
|
BUCKET = os.environ.get("S3_BUCKET", "mpr")
|
|
PREFIX_IN = os.environ.get("S3_PREFIX_IN", "in/")
|
|
PREFIX_OUT = os.environ.get("S3_PREFIX_OUT", "out/")
|
|
PREFIX_CHECKPOINTS = os.environ.get("S3_PREFIX_CHECKPOINTS", "checkpoints/")
|
|
|
|
|
|
@dataclass
|
|
class BlobObject:
|
|
key: str
|
|
filename: str
|
|
size_bytes: int
|
|
|
|
|
|
class BlobStore:
|
|
"""
|
|
Thin wrapper over the S3-compatible storage backend (MinIO / AWS S3).
|
|
|
|
All configuration (endpoint URL, credentials, region) is read from
|
|
environment variables by the underlying s3 module.
|
|
"""
|
|
|
|
def __init__(self, bucket: str, prefix: str = ""):
|
|
self.bucket = bucket
|
|
self.prefix = prefix
|
|
|
|
def _full_prefix(self, prefix: str) -> str:
|
|
"""Combine store prefix with caller prefix."""
|
|
return self.prefix + prefix
|
|
|
|
def list(
|
|
self,
|
|
prefix: str = "",
|
|
extensions: Optional[set[str]] = None,
|
|
) -> list[BlobObject]:
|
|
"""List objects in the bucket, optionally filtered by extension."""
|
|
from core.storage.s3 import list_objects
|
|
|
|
full = self._full_prefix(prefix)
|
|
raw = list_objects(self.bucket, prefix=full, extensions=extensions)
|
|
objects = []
|
|
for obj in raw:
|
|
blob = BlobObject(
|
|
key=obj["key"],
|
|
filename=obj["filename"],
|
|
size_bytes=obj["size"],
|
|
)
|
|
objects.append(blob)
|
|
return objects
|
|
|
|
def download_to_temp(self, key: str) -> str:
|
|
"""Download a blob to a temp file. Caller is responsible for cleanup."""
|
|
from core.storage.s3 import download_to_temp
|
|
|
|
return download_to_temp(self.bucket, key)
|
|
|
|
def upload(self, local_path: str, key: str) -> None:
|
|
"""Upload a local file to the bucket."""
|
|
from core.storage.s3 import upload_file
|
|
|
|
upload_file(local_path, self.bucket, key)
|
|
|
|
def get_url(self, key: str, expires: int = 3600) -> str:
|
|
"""Return a presigned URL for the given key."""
|
|
from core.storage.s3 import get_presigned_url
|
|
|
|
return get_presigned_url(self.bucket, key, expires=expires)
|
|
|
|
|
|
def get_store(purpose: str = "out") -> BlobStore:
|
|
"""
|
|
Return a BlobStore for the given purpose.
|
|
|
|
Purposes map to prefixes:
|
|
"in" → source media (S3_PREFIX_IN)
|
|
"out" → transcoded output (S3_PREFIX_OUT)
|
|
"checkpoints" → detection blobs (S3_PREFIX_CHECKPOINTS)
|
|
|
|
All share the same bucket (S3_BUCKET), each scoped to its prefix.
|
|
"""
|
|
prefix_map = {
|
|
"in": PREFIX_IN,
|
|
"out": PREFIX_OUT,
|
|
"checkpoints": PREFIX_CHECKPOINTS,
|
|
}
|
|
prefix = prefix_map.get(purpose, "")
|
|
return BlobStore(BUCKET, prefix=prefix)
|