refactor storage minio for k8s

This commit is contained in:
2026-03-26 09:20:23 -03:00
parent e27cb5bcc3
commit c9ba9e4f5f
22 changed files with 961 additions and 18 deletions

112
core/storage/blob.py Normal file
View File

@@ -0,0 +1,112 @@
"""
Cloud-agnostic blob storage interface.
All file-based sources (chunks, uploads, checkpoints) go through MinIO.
Local dev runs MinIO in docker-compose — same code path as production.
Production changes S3_ENDPOINT_URL; nothing else changes.
Single bucket, multiple prefixes:
in/ — source media
out/ — transcoded chunks
checkpoints/ — detection intermediate blobs (frames, crops)
Each prefix is independently configurable via env vars so they can
be split into separate buckets later if needed.
Nothing outside core/storage/ should import boto3 directly.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Optional
# Single bucket, prefix-based layout
BUCKET = os.environ.get("S3_BUCKET", "mpr")
PREFIX_IN = os.environ.get("S3_PREFIX_IN", "in/")
PREFIX_OUT = os.environ.get("S3_PREFIX_OUT", "out/")
PREFIX_CHECKPOINTS = os.environ.get("S3_PREFIX_CHECKPOINTS", "checkpoints/")
@dataclass
class BlobObject:
key: str
filename: str
size_bytes: int
class BlobStore:
"""
Thin wrapper over the S3-compatible storage backend (MinIO / AWS S3).
All configuration (endpoint URL, credentials, region) is read from
environment variables by the underlying s3 module.
"""
def __init__(self, bucket: str, prefix: str = ""):
self.bucket = bucket
self.prefix = prefix
def _full_prefix(self, prefix: str) -> str:
"""Combine store prefix with caller prefix."""
return self.prefix + prefix
def list(
self,
prefix: str = "",
extensions: Optional[set[str]] = None,
) -> list[BlobObject]:
"""List objects in the bucket, optionally filtered by extension."""
from core.storage.s3 import list_objects
full = self._full_prefix(prefix)
raw = list_objects(self.bucket, prefix=full, extensions=extensions)
objects = []
for obj in raw:
blob = BlobObject(
key=obj["key"],
filename=obj["filename"],
size_bytes=obj["size"],
)
objects.append(blob)
return objects
def download_to_temp(self, key: str) -> str:
"""Download a blob to a temp file. Caller is responsible for cleanup."""
from core.storage.s3 import download_to_temp
return download_to_temp(self.bucket, key)
def upload(self, local_path: str, key: str) -> None:
"""Upload a local file to the bucket."""
from core.storage.s3 import upload_file
upload_file(local_path, self.bucket, key)
def get_url(self, key: str, expires: int = 3600) -> str:
"""Return a presigned URL for the given key."""
from core.storage.s3 import get_presigned_url
return get_presigned_url(self.bucket, key, expires=expires)
def get_store(purpose: str = "out") -> BlobStore:
"""
Return a BlobStore for the given purpose.
Purposes map to prefixes:
"in" → source media (S3_PREFIX_IN)
"out" → transcoded output (S3_PREFIX_OUT)
"checkpoints" → detection blobs (S3_PREFIX_CHECKPOINTS)
All share the same bucket (S3_BUCKET), each scoped to its prefix.
"""
prefix_map = {
"in": PREFIX_IN,
"out": PREFIX_OUT,
"checkpoints": PREFIX_CHECKPOINTS,
}
prefix = prefix_map.get(purpose, "")
return BlobStore(BUCKET, prefix=prefix)