Files
mediaproc/core/api/detect/run.py
2026-03-28 10:05:59 -03:00

236 lines
7.4 KiB
Python

"""
Pipeline run endpoints.
POST /detect/run — launch pipeline on selected source
POST /detect/stop/{job_id} — cancel a running pipeline
POST /detect/pause/{job_id} — pause after current stage
POST /detect/resume/{job_id} — resume a paused pipeline
POST /detect/step/{job_id} — run one stage then pause
POST /detect/clear/{job_id} — clear events from Redis
GET /detect/status/{job_id} — pipeline run status
"""
from __future__ import annotations
import logging
import os
import threading
import uuid
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/detect", tags=["detect"])
# In-process pipeline tracking
_running_jobs: dict[str, threading.Thread] = {}
_cancelled_jobs: set[str] = set()
class RunRequest(BaseModel):
video_path: str # storage key
profile_name: str = "soccer_broadcast"
source_asset_id: str = ""
checkpoint: bool = True
skip_vlm: bool = False
skip_cloud: bool = False
log_level: str = "INFO" # INFO | DEBUG
pause_after_stage: bool = False
class RunResponse(BaseModel):
status: str
job_id: str
video_path: str
def _resolve_video_path(video_path: str) -> str:
"""Download a chunk from blob storage to a temp file."""
from core.storage.blob import get_store
store = get_store("out")
try:
return store.download_to_temp(video_path)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Failed to download chunk: {e}")
@router.post("/run", response_model=RunResponse)
def run_pipeline(req: RunRequest):
"""Launch a detection pipeline run on a source chunk."""
from detect import emit
from detect.graph import get_pipeline
from detect.state import DetectState
local_path = _resolve_video_path(req.video_path)
job_id = str(uuid.uuid4())
if req.skip_vlm:
os.environ["SKIP_VLM"] = "1"
elif "SKIP_VLM" in os.environ:
del os.environ["SKIP_VLM"]
if req.skip_cloud:
os.environ["SKIP_CLOUD"] = "1"
elif "SKIP_CLOUD" in os.environ:
del os.environ["SKIP_CLOUD"]
# Clear any stale events from a previous run with same job_id
from core.events import _get_redis
from detect.events import DETECT_EVENTS_PREFIX
r = _get_redis()
r.delete(f"{DETECT_EVENTS_PREFIX}:{job_id}")
emit.set_run_context(
run_id=job_id, parent_job_id=job_id, run_type="initial",
log_level=req.log_level,
)
pipeline = get_pipeline(checkpoint=req.checkpoint, profile_name=req.profile_name)
initial_state = DetectState(
video_path=local_path,
job_id=job_id,
profile_name=req.profile_name,
source_asset_id=req.source_asset_id,
)
from detect.graph import (
PipelineCancelled, set_cancel_check, clear_cancel_check,
init_pause, clear_pause,
)
set_cancel_check(job_id, lambda: job_id in _cancelled_jobs)
init_pause(job_id, pause_after_stage=req.pause_after_stage)
def _run():
try:
emit.log(job_id, "Pipeline", "INFO",
f"Starting pipeline: {req.video_path} (profile={req.profile_name})")
pipeline.invoke(initial_state)
emit.log(job_id, "Pipeline", "INFO", "Pipeline completed successfully")
emit.job_complete(job_id, {"status": "completed"})
except PipelineCancelled:
emit.log(job_id, "Pipeline", "INFO", "Pipeline cancelled")
emit.job_complete(job_id, {"status": "cancelled"})
except Exception as e:
logger.exception("Pipeline run %s failed: %s", job_id, e)
from detect.graph import _node_states, NODES
if job_id in _node_states:
states = _node_states[job_id]
for node in reversed(NODES):
if states.get(node) in ("running", "done"):
states[node] = "error"
break
nodes = [{"id": n, "status": states[n]} for n in NODES]
emit.graph_update(job_id, nodes)
emit.log(job_id, "Pipeline", "ERROR", str(e))
emit.job_complete(job_id, {"status": "failed", "error": str(e)})
finally:
_running_jobs.pop(job_id, None)
_cancelled_jobs.discard(job_id)
clear_cancel_check(job_id)
clear_pause(job_id)
emit.clear_run_context()
thread = threading.Thread(target=_run, daemon=True, name=f"pipeline-{job_id}")
_running_jobs[job_id] = thread
thread.start()
return RunResponse(status="started", job_id=job_id, video_path=req.video_path)
@router.post("/stop/{job_id}")
def stop_pipeline(job_id: str):
"""Stop a running pipeline. Signals cancellation; the thread checks on next stage."""
from detect import emit
if job_id not in _running_jobs:
raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}")
_cancelled_jobs.add(job_id)
emit.log(job_id, "Pipeline", "INFO", "Stop requested — cancelling after current stage")
return {"status": "stopping", "job_id": job_id}
@router.post("/pause/{job_id}")
def pause(job_id: str):
"""Pause a running pipeline after the current stage completes."""
from detect.graph import pause_pipeline
if job_id not in _running_jobs:
raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}")
pause_pipeline(job_id)
return {"status": "pausing", "job_id": job_id}
@router.post("/resume/{job_id}")
def resume(job_id: str):
"""Resume a paused pipeline."""
from detect.graph import resume_pipeline
if job_id not in _running_jobs:
raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}")
resume_pipeline(job_id)
return {"status": "running", "job_id": job_id}
@router.post("/step/{job_id}")
def step(job_id: str):
"""Run one stage then pause again."""
from detect.graph import step_pipeline
if job_id not in _running_jobs:
raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}")
step_pipeline(job_id)
return {"status": "stepping", "job_id": job_id}
@router.post("/pause-after-stage/{job_id}")
def toggle_pause_after_stage(job_id: str, enabled: bool = True):
"""Toggle pause-after-each-stage mode."""
from detect.graph import set_pause_after_stage
if job_id not in _running_jobs:
raise HTTPException(status_code=404, detail=f"No running pipeline: {job_id}")
set_pause_after_stage(job_id, enabled)
return {"status": "ok", "pause_after_stage": enabled, "job_id": job_id}
@router.get("/status/{job_id}")
def pipeline_status(job_id: str):
"""Get pipeline run status."""
from detect.graph import is_paused
running = job_id in _running_jobs
paused = is_paused(job_id)
cancelling = job_id in _cancelled_jobs
if cancelling:
status = "cancelling"
elif paused:
status = "paused"
elif running:
status = "running"
else:
status = "idle"
return {"status": status, "job_id": job_id}
@router.post("/clear/{job_id}")
def clear_pipeline(job_id: str):
"""Clear events for a job from Redis."""
from core.events import _get_redis
from detect.events import DETECT_EVENTS_PREFIX
r = _get_redis()
r.delete(f"{DETECT_EVENTS_PREFIX}:{job_id}")
return {"status": "cleared", "job_id": job_id}