""" Cloud-agnostic blob storage interface. All file-based sources (chunks, uploads, checkpoints) go through MinIO. Local dev runs MinIO in docker-compose — same code path as production. Production changes S3_ENDPOINT_URL; nothing else changes. Single bucket, multiple prefixes: in/ — source media out/ — transcoded chunks checkpoints/ — detection intermediate blobs (frames, crops) Each prefix is independently configurable via env vars so they can be split into separate buckets later if needed. Nothing outside core/storage/ should import boto3 directly. """ from __future__ import annotations import os from dataclasses import dataclass from typing import Optional # Single bucket, prefix-based layout BUCKET = os.environ.get("S3_BUCKET", "mpr") PREFIX_IN = os.environ.get("S3_PREFIX_IN", "in/") PREFIX_OUT = os.environ.get("S3_PREFIX_OUT", "out/") PREFIX_CHECKPOINTS = os.environ.get("S3_PREFIX_CHECKPOINTS", "checkpoints/") @dataclass class BlobObject: key: str filename: str size_bytes: int class BlobStore: """ Thin wrapper over the S3-compatible storage backend (MinIO / AWS S3). All configuration (endpoint URL, credentials, region) is read from environment variables by the underlying s3 module. """ def __init__(self, bucket: str, prefix: str = ""): self.bucket = bucket self.prefix = prefix def _full_prefix(self, prefix: str) -> str: """Combine store prefix with caller prefix.""" return self.prefix + prefix def list( self, prefix: str = "", extensions: Optional[set[str]] = None, ) -> list[BlobObject]: """List objects in the bucket, optionally filtered by extension.""" from core.storage.s3 import list_objects full = self._full_prefix(prefix) raw = list_objects(self.bucket, prefix=full, extensions=extensions) objects = [] for obj in raw: blob = BlobObject( key=obj["key"], filename=obj["filename"], size_bytes=obj["size"], ) objects.append(blob) return objects def download_to_temp(self, key: str) -> str: """Download a blob to a temp file. Caller is responsible for cleanup.""" from core.storage.s3 import download_to_temp return download_to_temp(self.bucket, key) def upload(self, local_path: str, key: str) -> None: """Upload a local file to the bucket.""" from core.storage.s3 import upload_file upload_file(local_path, self.bucket, key) def get_url(self, key: str, expires: int = 3600) -> str: """Return a presigned URL for the given key.""" from core.storage.s3 import get_presigned_url return get_presigned_url(self.bucket, key, expires=expires) def get_store(purpose: str = "out") -> BlobStore: """ Return a BlobStore for the given purpose. Purposes map to prefixes: "in" → source media (S3_PREFIX_IN) "out" → transcoded output (S3_PREFIX_OUT) "checkpoints" → detection blobs (S3_PREFIX_CHECKPOINTS) All share the same bucket (S3_BUCKET), each scoped to its prefix. """ prefix_map = { "in": PREFIX_IN, "out": PREFIX_OUT, "checkpoints": PREFIX_CHECKPOINTS, } prefix = prefix_map.get(purpose, "") return BlobStore(BUCKET, prefix=prefix)