refactor storage minio for k8s
This commit is contained in:
112
core/storage/blob.py
Normal file
112
core/storage/blob.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
Cloud-agnostic blob storage interface.
|
||||
|
||||
All file-based sources (chunks, uploads, checkpoints) go through MinIO.
|
||||
Local dev runs MinIO in docker-compose — same code path as production.
|
||||
Production changes S3_ENDPOINT_URL; nothing else changes.
|
||||
|
||||
Single bucket, multiple prefixes:
|
||||
in/ — source media
|
||||
out/ — transcoded chunks
|
||||
checkpoints/ — detection intermediate blobs (frames, crops)
|
||||
|
||||
Each prefix is independently configurable via env vars so they can
|
||||
be split into separate buckets later if needed.
|
||||
|
||||
Nothing outside core/storage/ should import boto3 directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Single bucket, prefix-based layout
|
||||
BUCKET = os.environ.get("S3_BUCKET", "mpr")
|
||||
PREFIX_IN = os.environ.get("S3_PREFIX_IN", "in/")
|
||||
PREFIX_OUT = os.environ.get("S3_PREFIX_OUT", "out/")
|
||||
PREFIX_CHECKPOINTS = os.environ.get("S3_PREFIX_CHECKPOINTS", "checkpoints/")
|
||||
|
||||
|
||||
@dataclass
|
||||
class BlobObject:
|
||||
key: str
|
||||
filename: str
|
||||
size_bytes: int
|
||||
|
||||
|
||||
class BlobStore:
|
||||
"""
|
||||
Thin wrapper over the S3-compatible storage backend (MinIO / AWS S3).
|
||||
|
||||
All configuration (endpoint URL, credentials, region) is read from
|
||||
environment variables by the underlying s3 module.
|
||||
"""
|
||||
|
||||
def __init__(self, bucket: str, prefix: str = ""):
|
||||
self.bucket = bucket
|
||||
self.prefix = prefix
|
||||
|
||||
def _full_prefix(self, prefix: str) -> str:
|
||||
"""Combine store prefix with caller prefix."""
|
||||
return self.prefix + prefix
|
||||
|
||||
def list(
|
||||
self,
|
||||
prefix: str = "",
|
||||
extensions: Optional[set[str]] = None,
|
||||
) -> list[BlobObject]:
|
||||
"""List objects in the bucket, optionally filtered by extension."""
|
||||
from core.storage.s3 import list_objects
|
||||
|
||||
full = self._full_prefix(prefix)
|
||||
raw = list_objects(self.bucket, prefix=full, extensions=extensions)
|
||||
objects = []
|
||||
for obj in raw:
|
||||
blob = BlobObject(
|
||||
key=obj["key"],
|
||||
filename=obj["filename"],
|
||||
size_bytes=obj["size"],
|
||||
)
|
||||
objects.append(blob)
|
||||
return objects
|
||||
|
||||
def download_to_temp(self, key: str) -> str:
|
||||
"""Download a blob to a temp file. Caller is responsible for cleanup."""
|
||||
from core.storage.s3 import download_to_temp
|
||||
|
||||
return download_to_temp(self.bucket, key)
|
||||
|
||||
def upload(self, local_path: str, key: str) -> None:
|
||||
"""Upload a local file to the bucket."""
|
||||
from core.storage.s3 import upload_file
|
||||
|
||||
upload_file(local_path, self.bucket, key)
|
||||
|
||||
def get_url(self, key: str, expires: int = 3600) -> str:
|
||||
"""Return a presigned URL for the given key."""
|
||||
from core.storage.s3 import get_presigned_url
|
||||
|
||||
return get_presigned_url(self.bucket, key, expires=expires)
|
||||
|
||||
|
||||
def get_store(purpose: str = "out") -> BlobStore:
|
||||
"""
|
||||
Return a BlobStore for the given purpose.
|
||||
|
||||
Purposes map to prefixes:
|
||||
"in" → source media (S3_PREFIX_IN)
|
||||
"out" → transcoded output (S3_PREFIX_OUT)
|
||||
"checkpoints" → detection blobs (S3_PREFIX_CHECKPOINTS)
|
||||
|
||||
All share the same bucket (S3_BUCKET), each scoped to its prefix.
|
||||
"""
|
||||
prefix_map = {
|
||||
"in": PREFIX_IN,
|
||||
"out": PREFIX_OUT,
|
||||
"checkpoints": PREFIX_CHECKPOINTS,
|
||||
}
|
||||
prefix = prefix_map.get(purpose, "")
|
||||
return BlobStore(BUCKET, prefix=prefix)
|
||||
Reference in New Issue
Block a user