""" Source browser for detection pipeline. Lists available media sources from blob storage (MinIO). All file-based sources go through MinIO — no host filesystem access. The pipeline downloads chunks to a temp path before processing. Source types (current and future): - chunk_job: pre-chunked segments in MinIO (current) - upload: user-uploaded file, lands in MinIO via upload endpoint (future) - device: local camera/capture card via ffmpeg, no MinIO (future) - stream: RTMP/HLS URL via ffmpeg, no MinIO (future) GET /detect/sources — list chunk jobs from blob store GET /detect/sources/{job_id}/chunks — list chunks for a specific job POST /detect/run — launch pipeline on selected source """ from __future__ import annotations import logging import os import threading import uuid from fastapi import APIRouter, HTTPException from pydantic import BaseModel logger = logging.getLogger(__name__) router = APIRouter(prefix="/detect", tags=["detect"]) # In-process pipeline tracking _running_jobs: dict[str, "threading.Thread"] = {} _cancelled_jobs: set[str] = set() class ChunkInfo(BaseModel): filename: str key: str size_bytes: int class SourceInfo(BaseModel): job_id: str source_type: str = "chunk_job" chunk_count: int total_bytes: int = 0 class RunRequest(BaseModel): video_path: str # storage key profile_name: str = "soccer_broadcast" source_asset_id: str = "" checkpoint: bool = True skip_vlm: bool = False skip_cloud: bool = False log_level: str = "INFO" # INFO | DEBUG class RunResponse(BaseModel): status: str job_id: str video_path: str # --------------------------------------------------------------------------- # Source listing # --------------------------------------------------------------------------- def _list_sources() -> list[SourceInfo]: """List chunk jobs from blob storage.""" from core.storage.blob import get_store store = get_store("out") try: objects = store.list(prefix="chunks/") except Exception as e: logger.warning("Failed to list blob sources: %s", e) return [] jobs: dict[str, int] = {} job_bytes: dict[str, int] = {} for obj in objects: # Keys include store prefix: out/chunks/{job_id}/file.mp4 # Strip prefix to get: chunks/{job_id}/file.mp4 rel_key = obj.key.removeprefix(store.prefix) parts = rel_key.split("/") if len(parts) >= 3 and parts[0] == "chunks": job_id = parts[1] jobs[job_id] = jobs.get(job_id, 0) + 1 job_bytes[job_id] = job_bytes.get(job_id, 0) + obj.size_bytes sources = [] for job_id, count in sorted(jobs.items()): source = SourceInfo( job_id=job_id, source_type="chunk_job", chunk_count=count, total_bytes=job_bytes.get(job_id, 0), ) sources.append(source) return sources @router.get("/sources", response_model=list[SourceInfo]) def list_sources(): """List available chunk jobs from blob storage.""" return _list_sources() @router.get("/sources/{source_job_id}/chunks", response_model=list[ChunkInfo]) def list_chunks(source_job_id: str): """List chunks for a specific source job.""" from core.storage.blob import get_store store = get_store("out") try: objects = store.list(prefix=f"chunks/{source_job_id}/", extensions={".mp4"}) except Exception as e: logger.warning("Failed to list chunks for %s: %s", source_job_id, e) raise HTTPException(status_code=503, detail=f"Blob storage unavailable: {e}") if not objects: raise HTTPException(status_code=404, detail=f"Source not found: {source_job_id}") chunks = [] for obj in objects: info = ChunkInfo(filename=obj.filename, key=obj.key, size_bytes=obj.size_bytes) chunks.append(info) return sorted(chunks, key=lambda c: c.filename) @router.get("/sources/{source_job_id}/chunks/{filename}/url") def get_chunk_url(source_job_id: str, filename: str): """Return a presigned URL for previewing a chunk in the browser.""" from core.storage.blob import get_store store = get_store("out") key = f"chunks/{source_job_id}/{filename}" try: url = store.get_url(key, expires=3600) except Exception as e: raise HTTPException(status_code=503, detail=f"Could not generate URL: {e}") return {"url": url} # --------------------------------------------------------------------------- # Run pipeline # --------------------------------------------------------------------------- def _resolve_video_path(video_path: str) -> str: """Download a chunk from blob storage to a temp file.""" from core.storage.blob import get_store store = get_store("out") try: return store.download_to_temp(video_path) except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to download chunk: {e}") @router.post("/run", response_model=RunResponse) def run_pipeline(req: RunRequest): """Launch a detection pipeline run on a source chunk.""" from detect import emit from detect.graph import get_pipeline from detect.state import DetectState local_path = _resolve_video_path(req.video_path) job_id = str(uuid.uuid4()) if req.skip_vlm: os.environ["SKIP_VLM"] = "1" elif "SKIP_VLM" in os.environ: del os.environ["SKIP_VLM"] if req.skip_cloud: os.environ["SKIP_CLOUD"] = "1" elif "SKIP_CLOUD" in os.environ: del os.environ["SKIP_CLOUD"] # Clear any stale events from a previous run with same job_id from core.events import _get_redis from detect.events import DETECT_EVENTS_PREFIX r = _get_redis() r.delete(f"{DETECT_EVENTS_PREFIX}:{job_id}") emit.set_run_context( run_id=job_id, parent_job_id=job_id, run_type="initial", log_level=req.log_level, ) pipeline = get_pipeline(checkpoint=req.checkpoint) initial_state = DetectState( video_path=local_path, job_id=job_id, profile_name=req.profile_name, source_asset_id=req.source_asset_id, ) from detect.graph import PipelineCancelled, set_cancel_check, clear_cancel_check set_cancel_check(job_id, lambda: job_id in _cancelled_jobs) def _run(): try: emit.log(job_id, "Pipeline", "INFO", f"Starting pipeline: {req.video_path} (profile={req.profile_name})") pipeline.invoke(initial_state) emit.log(job_id, "Pipeline", "INFO", "Pipeline completed successfully") emit.job_complete(job_id, {"status": "completed"}) except PipelineCancelled: emit.log(job_id, "Pipeline", "INFO", "Pipeline cancelled") emit.job_complete(job_id, {"status": "cancelled"}) except Exception as e: logger.exception("Pipeline run %s failed: %s", job_id, e) # Mark the current/last stage as error in the graph from detect.graph import _node_states, NODES if job_id in _node_states: states = _node_states[job_id] for node in reversed(NODES): if states.get(node) in ("running", "done"): states[node] = "error" break nodes = [{"id": n, "status": states[n]} for n in NODES] emit.graph_update(job_id, nodes) emit.log(job_id, "Pipeline", "ERROR", str(e)) emit.job_complete(job_id, {"status": "failed", "error": str(e)}) finally: _running_jobs.pop(job_id, None) _cancelled_jobs.discard(job_id) clear_cancel_check(job_id) emit.clear_run_context() thread = threading.Thread(target=_run, daemon=True, name=f"pipeline-{job_id}") _running_jobs[job_id] = thread thread.start() return RunResponse(status="started", job_id=job_id, video_path=req.video_path) @router.post("/stop/{job_id}") def stop_pipeline(job_id: str): """Stop a running pipeline. Signals cancellation; the thread checks on next stage.""" from detect import emit if job_id not in _running_jobs: raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}") _cancelled_jobs.add(job_id) emit.log(job_id, "Pipeline", "INFO", "Stop requested — cancelling after current stage") return {"status": "stopping", "job_id": job_id} @router.post("/clear/{job_id}") def clear_pipeline(job_id: str): """Clear events for a job from Redis.""" from core.events import _get_redis from detect.events import DETECT_EVENTS_PREFIX r = _get_redis() r.delete(f"{DETECT_EVENTS_PREFIX}:{job_id}") return {"status": "cleared", "job_id": job_id}