phase 1

2026-03-28 08:46:06 -03:00
parent acc99e691d
commit 0bd3888155
30 changed files with 390 additions and 1044 deletions
--- a/core/api/chunker_sse.py
+++ b/core/api/chunker_sse.py
@@ -1,8 +1,8 @@
 """
 SSE endpoint for chunker pipeline events.

-Uses Redis as the event bus between Celery workers and the SSE stream.
-Celery worker pushes events via core.events, SSE endpoint polls them.
+Uses Redis as the event bus. Pipeline pushes events via core.events,
+SSE endpoint polls them.

 GET /chunker/stream/{job_id} → text/event-stream
 """
--- a/core/api/detect/init.py
+++ b/core/api/detect/init.py
@@ -0,0 +1,20 @@
+"""
+Detection API — aggregated router.
+
+Combines all detect sub-routers into a single include for main.py.
+"""
+
+from fastapi import APIRouter
+
+from .sources import router as sources_router
+from .run import router as run_router
+from .sse import router as sse_router
+from .replay import router as replay_router
+from .config import router as config_router
+
+router = APIRouter()
+router.include_router(sources_router)
+router.include_router(run_router)
+router.include_router(sse_router)
+router.include_router(replay_router)
+router.include_router(config_router)
--- a/core/api/detect/config.py
+++ b/core/api/detect/config.py
--- a/core/api/detect/replay.py
+++ b/core/api/detect/replay.py
--- a/core/api/detect_sources.py
+++ b/core/api/detect_sources.py
@@ -1,19 +1,9 @@
 """
-Source browser for detection pipeline.
+Pipeline run endpoints.

-Lists available media sources from blob storage (MinIO).
-All file-based sources go through MinIO — no host filesystem access.
-The pipeline downloads chunks to a temp path before processing.
-
-Source types (current and future):
-  - chunk_job: pre-chunked segments in MinIO (current)
-  - upload:    user-uploaded file, lands in MinIO via upload endpoint (future)
-  - device:    local camera/capture card via ffmpeg, no MinIO (future)
-  - stream:    RTMP/HLS URL via ffmpeg, no MinIO (future)
-
-GET  /detect/sources                    — list chunk jobs from blob store
-GET  /detect/sources/{job_id}/chunks    — list chunks for a specific job
-POST /detect/run                        — launch pipeline on selected source
+POST /detect/run            — launch pipeline on selected source
+POST /detect/stop/{job_id}  — cancel a running pipeline
+POST /detect/clear/{job_id} — clear events from Redis
 """

 from __future__ import annotations
@@ -31,23 +21,10 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/detect", tags=["detect"])

 # In-process pipeline tracking
-_running_jobs: dict[str, "threading.Thread"] = {}
+_running_jobs: dict[str, threading.Thread] = {}
 _cancelled_jobs: set[str] = set()


-class ChunkInfo(BaseModel):
-    filename: str
-    key: str
-    size_bytes: int
-
-
-class SourceInfo(BaseModel):
-    job_id: str
-    source_type: str = "chunk_job"
-    chunk_count: int
-    total_bytes: int = 0
-
-
 class RunRequest(BaseModel):
    video_path: str           # storage key
    profile_name: str = "soccer_broadcast"
@@ -64,91 +41,6 @@ class RunResponse(BaseModel):
    video_path: str


-# ---------------------------------------------------------------------------
-# Source listing
-# ---------------------------------------------------------------------------
-
-def _list_sources() -> list[SourceInfo]:
-    """List chunk jobs from blob storage."""
-    from core.storage.blob import get_store
-
-    store = get_store("out")
-    try:
-        objects = store.list(prefix="chunks/")
-    except Exception as e:
-        logger.warning("Failed to list blob sources: %s", e)
-        return []
-
-    jobs: dict[str, int] = {}
-    job_bytes: dict[str, int] = {}
-    for obj in objects:
-        # Keys include store prefix: out/chunks/{job_id}/file.mp4
-        # Strip prefix to get: chunks/{job_id}/file.mp4
-        rel_key = obj.key.removeprefix(store.prefix)
-        parts = rel_key.split("/")
-        if len(parts) >= 3 and parts[0] == "chunks":
-            job_id = parts[1]
-            jobs[job_id] = jobs.get(job_id, 0) + 1
-            job_bytes[job_id] = job_bytes.get(job_id, 0) + obj.size_bytes
-
-    sources = []
-    for job_id, count in sorted(jobs.items()):
-        source = SourceInfo(
-            job_id=job_id,
-            source_type="chunk_job",
-            chunk_count=count,
-            total_bytes=job_bytes.get(job_id, 0),
-        )
-        sources.append(source)
-    return sources
-
-
-@router.get("/sources", response_model=list[SourceInfo])
-def list_sources():
-    """List available chunk jobs from blob storage."""
-    return _list_sources()
-
-
-@router.get("/sources/{source_job_id}/chunks", response_model=list[ChunkInfo])
-def list_chunks(source_job_id: str):
-    """List chunks for a specific source job."""
-    from core.storage.blob import get_store
-
-    store = get_store("out")
-    try:
-        objects = store.list(prefix=f"chunks/{source_job_id}/", extensions={".mp4"})
-    except Exception as e:
-        logger.warning("Failed to list chunks for %s: %s", source_job_id, e)
-        raise HTTPException(status_code=503, detail=f"Blob storage unavailable: {e}")
-
-    if not objects:
-        raise HTTPException(status_code=404, detail=f"Source not found: {source_job_id}")
-
-    chunks = []
-    for obj in objects:
-        info = ChunkInfo(filename=obj.filename, key=obj.key, size_bytes=obj.size_bytes)
-        chunks.append(info)
-    return sorted(chunks, key=lambda c: c.filename)
-
-
-@router.get("/sources/{source_job_id}/chunks/{filename}/url")
-def get_chunk_url(source_job_id: str, filename: str):
-    """Return a presigned URL for previewing a chunk in the browser."""
-    from core.storage.blob import get_store
-
-    store = get_store("out")
-    key = f"chunks/{source_job_id}/{filename}"
-    try:
-        url = store.get_url(key, expires=3600)
-    except Exception as e:
-        raise HTTPException(status_code=503, detail=f"Could not generate URL: {e}")
-    return {"url": url}
-
-
-# ---------------------------------------------------------------------------
-# Run pipeline
-# ---------------------------------------------------------------------------
-
 def _resolve_video_path(video_path: str) -> str:
    """Download a chunk from blob storage to a temp file."""
    from core.storage.blob import get_store
@@ -216,7 +108,6 @@ def run_pipeline(req: RunRequest):
            emit.job_complete(job_id, {"status": "cancelled"})
        except Exception as e:
            logger.exception("Pipeline run %s failed: %s", job_id, e)
-            # Mark the current/last stage as error in the graph
            from detect.graph import _node_states, NODES
            if job_id in _node_states:
                states = _node_states[job_id]
--- a/core/api/detect/sources.py
+++ b/core/api/detect/sources.py
@@ -0,0 +1,108 @@
+"""
+Source browser for detection pipeline.
+
+Lists available media sources from blob storage (MinIO).
+
+GET  /detect/sources                              — list chunk jobs
+GET  /detect/sources/{job_id}/chunks              — list chunks for a job
+GET  /detect/sources/{job_id}/chunks/{name}/url   — presigned preview URL
+"""
+
+from __future__ import annotations
+
+import logging
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/detect", tags=["detect"])
+
+
+class ChunkInfoResponse(BaseModel):
+    filename: str
+    key: str
+    size_bytes: int
+
+
+class SourceInfoResponse(BaseModel):
+    job_id: str
+    source_type: str = "chunk_job"
+    chunk_count: int
+    total_bytes: int = 0
+
+
+def _list_sources() -> list[SourceInfoResponse]:
+    """List chunk jobs from blob storage."""
+    from core.storage.blob import get_store
+
+    store = get_store("out")
+    try:
+        objects = store.list(prefix="chunks/")
+    except Exception as e:
+        logger.warning("Failed to list blob sources: %s", e)
+        return []
+
+    jobs: dict[str, int] = {}
+    job_bytes: dict[str, int] = {}
+    for obj in objects:
+        rel_key = obj.key.removeprefix(store.prefix)
+        parts = rel_key.split("/")
+        if len(parts) >= 3 and parts[0] == "chunks":
+            job_id = parts[1]
+            jobs[job_id] = jobs.get(job_id, 0) + 1
+            job_bytes[job_id] = job_bytes.get(job_id, 0) + obj.size_bytes
+
+    sources = []
+    for job_id, count in sorted(jobs.items()):
+        source = SourceInfoResponse(
+            job_id=job_id,
+            source_type="chunk_job",
+            chunk_count=count,
+            total_bytes=job_bytes.get(job_id, 0),
+        )
+        sources.append(source)
+    return sources
+
+
+@router.get("/sources", response_model=list[SourceInfoResponse])
+def list_sources():
+    """List available chunk jobs from blob storage."""
+    return _list_sources()
+
+
+@router.get("/sources/{source_job_id}/chunks", response_model=list[ChunkInfoResponse])
+def list_chunks(source_job_id: str):
+    """List chunks for a specific source job."""
+    from core.storage.blob import get_store
+
+    store = get_store("out")
+    try:
+        objects = store.list(prefix=f"chunks/{source_job_id}/", extensions={".mp4"})
+    except Exception as e:
+        logger.warning("Failed to list chunks for %s: %s", source_job_id, e)
+        raise HTTPException(status_code=503, detail=f"Blob storage unavailable: {e}")
+
+    if not objects:
+        raise HTTPException(status_code=404, detail=f"Source not found: {source_job_id}")
+
+    chunks = []
+    for obj in objects:
+        info = ChunkInfoResponse(filename=obj.filename, key=obj.key, size_bytes=obj.size_bytes)
+        chunks.append(info)
+    return sorted(chunks, key=lambda c: c.filename)
+
+
+@router.get("/sources/{source_job_id}/chunks/{filename}/url")
+def get_chunk_url(source_job_id: str, filename: str):
+    """Return a presigned URL for previewing a chunk in the browser."""
+    from core.storage.blob import get_store
+
+    store = get_store("out")
+    key = f"chunks/{source_job_id}/{filename}"
+    try:
+        url = store.get_url(key, expires=3600)
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"Could not generate URL: {e}")
+    return {"url": url}
--- a/core/api/detect/sse.py
+++ b/core/api/detect/sse.py
--- a/core/api/main.py
+++ b/core/api/main.py
@@ -19,10 +19,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from strawberry.fastapi import GraphQLRouter

 from core.api.chunker_sse import router as chunker_router
-from core.api.detect_sse import router as detect_router
-from core.api.detect_replay import router as detect_replay_router
-from core.api.detect_config import router as detect_config_router
-from core.api.detect_sources import router as detect_sources_router
+from core.api.detect import router as detect_router
 from core.api.graphql import schema as graphql_schema

 CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@@ -61,18 +58,9 @@ app.include_router(graphql_router, prefix="/graphql")
 # Chunker SSE
 app.include_router(chunker_router)

-# Detection SSE
+# Detection API (sources, run, SSE, replay, config)
 app.include_router(detect_router)

-# Detection replay/retry
-app.include_router(detect_replay_router)
-
-# Detection config
-app.include_router(detect_config_router)
-
-# Detection sources + run launcher
-app.include_router(detect_sources_router)
-

@app.get("/health")
 def health():