This commit is contained in:
2026-03-28 09:40:07 -03:00
parent 0bd3888155
commit e46bbc419c
10 changed files with 508 additions and 49 deletions

View File

@@ -4,7 +4,7 @@ Detection pipeline graph.
detect/graph/
nodes.py — node functions (one per stage)
events.py — graph_update SSE emission
runner.py — pipeline execution (LangGraph wrapper, checkpoint, cancel)
runner.py — pipeline execution (LangGraph wrapper, checkpoint, cancel, pause)
"""
from .nodes import NODES, NODE_FUNCTIONS
@@ -12,8 +12,15 @@ from .runner import (
PipelineCancelled,
build_graph,
clear_cancel_check,
clear_pause,
get_pipeline,
init_pause,
is_paused,
pause_pipeline,
resume_pipeline,
set_cancel_check,
set_pause_after_stage,
step_pipeline,
)
from .events import _node_states
@@ -25,5 +32,12 @@ __all__ = [
"get_pipeline",
"set_cancel_check",
"clear_cancel_check",
"init_pause",
"clear_pause",
"pause_pipeline",
"resume_pipeline",
"step_pipeline",
"set_pause_after_stage",
"is_paused",
"_node_states",
]

View File

@@ -7,13 +7,17 @@ custom runner in Phase 3, with an executor socket for distributed dispatch.
from __future__ import annotations
import logging
import os
import threading
from langgraph.graph import END, StateGraph
from detect.state import DetectState
from .nodes import NODES, NODE_FUNCTIONS
logger = logging.getLogger(__name__)
# --- Checkpoint wrapper ---
@@ -27,7 +31,15 @@ class PipelineCancelled(Exception):
pass
# Cancellation hook — set by the run endpoint, checked before each node
class PipelinePaused(Exception):
"""Raised when a pipeline is paused (internally, for flow control)."""
pass
# ---------------------------------------------------------------------------
# Cancellation — checked before each node
# ---------------------------------------------------------------------------
_cancel_check: dict[str, callable] = {}
@@ -39,6 +51,92 @@ def clear_cancel_check(job_id: str):
_cancel_check.pop(job_id, None)
# ---------------------------------------------------------------------------
# Pause / Resume / Step — checked after each node completes
#
# _pause_gate: threading.Event per job. When cleared, the runner blocks.
# When set, the runner proceeds to the next node.
# _pause_after_stage: if True, automatically clear the gate after each node.
# ---------------------------------------------------------------------------
_pause_gate: dict[str, threading.Event] = {}
_pause_after_stage: dict[str, bool] = {}
def init_pause(job_id: str, pause_after_stage: bool = False):
"""Initialize pause state for a job. Called when pipeline starts."""
gate = threading.Event()
gate.set() # start unpaused
_pause_gate[job_id] = gate
_pause_after_stage[job_id] = pause_after_stage
def clear_pause(job_id: str):
"""Clean up pause state. Called when pipeline finishes."""
_pause_gate.pop(job_id, None)
_pause_after_stage.pop(job_id, None)
def pause_pipeline(job_id: str):
"""Pause a running pipeline. It will block after the current stage completes."""
gate = _pause_gate.get(job_id)
if gate:
gate.clear()
logger.info("Pipeline %s paused", job_id)
def resume_pipeline(job_id: str):
"""Resume a paused pipeline."""
gate = _pause_gate.get(job_id)
if gate:
gate.set()
logger.info("Pipeline %s resumed", job_id)
def step_pipeline(job_id: str):
"""Run one stage then pause again."""
_pause_after_stage[job_id] = True
gate = _pause_gate.get(job_id)
if gate:
gate.set() # unblock for one stage, _pause_after_stage re-pauses after
logger.info("Pipeline %s stepping", job_id)
def set_pause_after_stage(job_id: str, enabled: bool):
"""Toggle pause-after-each-stage mode."""
_pause_after_stage[job_id] = enabled
if not enabled:
# If disabling, also resume in case we're currently paused
gate = _pause_gate.get(job_id)
if gate:
gate.set()
def is_paused(job_id: str) -> bool:
"""Check if a pipeline is currently paused."""
gate = _pause_gate.get(job_id)
return gate is not None and not gate.is_set()
def _wait_if_paused(job_id: str, node_name: str):
"""Block until resumed. Called after each node completes."""
gate = _pause_gate.get(job_id)
if gate is None:
return
# If pause-after-stage is on, pause now
if _pause_after_stage.get(job_id, False):
gate.clear()
from detect import emit
emit.log(job_id, "Pipeline", "INFO", f"Paused after {node_name}")
# Block until gate is set (resume/step) or cancelled
while not gate.wait(timeout=0.5):
check = _cancel_check.get(job_id)
if check and check():
raise PipelineCancelled(f"Cancelled while paused before next stage")
def _checkpointing_node(node_name: str, node_fn):
"""Wrap a node function to auto-checkpoint after completion."""
@@ -81,6 +179,10 @@ def _checkpointing_node(node_name: str, node_fn):
output_json=output_json,
)
_latest_checkpoint[job_id] = new_checkpoint_id
# Pause check — blocks if paused, respects cancel while waiting
_wait_if_paused(job_id, node_name)
return result
wrapper.__name__ = node_fn.__name__