phase 2
This commit is contained in:
@@ -7,13 +7,17 @@ custom runner in Phase 3, with an executor socket for distributed dispatch.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
|
||||
from langgraph.graph import END, StateGraph
|
||||
|
||||
from detect.state import DetectState
|
||||
from .nodes import NODES, NODE_FUNCTIONS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# --- Checkpoint wrapper ---
|
||||
|
||||
@@ -27,7 +31,15 @@ class PipelineCancelled(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Cancellation hook — set by the run endpoint, checked before each node
|
||||
class PipelinePaused(Exception):
|
||||
"""Raised when a pipeline is paused (internally, for flow control)."""
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cancellation — checked before each node
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_cancel_check: dict[str, callable] = {}
|
||||
|
||||
|
||||
@@ -39,6 +51,92 @@ def clear_cancel_check(job_id: str):
|
||||
_cancel_check.pop(job_id, None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pause / Resume / Step — checked after each node completes
|
||||
#
|
||||
# _pause_gate: threading.Event per job. When cleared, the runner blocks.
|
||||
# When set, the runner proceeds to the next node.
|
||||
# _pause_after_stage: if True, automatically clear the gate after each node.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_pause_gate: dict[str, threading.Event] = {}
|
||||
_pause_after_stage: dict[str, bool] = {}
|
||||
|
||||
|
||||
def init_pause(job_id: str, pause_after_stage: bool = False):
|
||||
"""Initialize pause state for a job. Called when pipeline starts."""
|
||||
gate = threading.Event()
|
||||
gate.set() # start unpaused
|
||||
_pause_gate[job_id] = gate
|
||||
_pause_after_stage[job_id] = pause_after_stage
|
||||
|
||||
|
||||
def clear_pause(job_id: str):
|
||||
"""Clean up pause state. Called when pipeline finishes."""
|
||||
_pause_gate.pop(job_id, None)
|
||||
_pause_after_stage.pop(job_id, None)
|
||||
|
||||
|
||||
def pause_pipeline(job_id: str):
|
||||
"""Pause a running pipeline. It will block after the current stage completes."""
|
||||
gate = _pause_gate.get(job_id)
|
||||
if gate:
|
||||
gate.clear()
|
||||
logger.info("Pipeline %s paused", job_id)
|
||||
|
||||
|
||||
def resume_pipeline(job_id: str):
|
||||
"""Resume a paused pipeline."""
|
||||
gate = _pause_gate.get(job_id)
|
||||
if gate:
|
||||
gate.set()
|
||||
logger.info("Pipeline %s resumed", job_id)
|
||||
|
||||
|
||||
def step_pipeline(job_id: str):
|
||||
"""Run one stage then pause again."""
|
||||
_pause_after_stage[job_id] = True
|
||||
gate = _pause_gate.get(job_id)
|
||||
if gate:
|
||||
gate.set() # unblock for one stage, _pause_after_stage re-pauses after
|
||||
logger.info("Pipeline %s stepping", job_id)
|
||||
|
||||
|
||||
def set_pause_after_stage(job_id: str, enabled: bool):
|
||||
"""Toggle pause-after-each-stage mode."""
|
||||
_pause_after_stage[job_id] = enabled
|
||||
if not enabled:
|
||||
# If disabling, also resume in case we're currently paused
|
||||
gate = _pause_gate.get(job_id)
|
||||
if gate:
|
||||
gate.set()
|
||||
|
||||
|
||||
def is_paused(job_id: str) -> bool:
|
||||
"""Check if a pipeline is currently paused."""
|
||||
gate = _pause_gate.get(job_id)
|
||||
return gate is not None and not gate.is_set()
|
||||
|
||||
|
||||
def _wait_if_paused(job_id: str, node_name: str):
|
||||
"""Block until resumed. Called after each node completes."""
|
||||
gate = _pause_gate.get(job_id)
|
||||
if gate is None:
|
||||
return
|
||||
|
||||
# If pause-after-stage is on, pause now
|
||||
if _pause_after_stage.get(job_id, False):
|
||||
gate.clear()
|
||||
from detect import emit
|
||||
emit.log(job_id, "Pipeline", "INFO", f"Paused after {node_name}")
|
||||
|
||||
# Block until gate is set (resume/step) or cancelled
|
||||
while not gate.wait(timeout=0.5):
|
||||
check = _cancel_check.get(job_id)
|
||||
if check and check():
|
||||
raise PipelineCancelled(f"Cancelled while paused before next stage")
|
||||
|
||||
|
||||
def _checkpointing_node(node_name: str, node_fn):
|
||||
"""Wrap a node function to auto-checkpoint after completion."""
|
||||
|
||||
@@ -81,6 +179,10 @@ def _checkpointing_node(node_name: str, node_fn):
|
||||
output_json=output_json,
|
||||
)
|
||||
_latest_checkpoint[job_id] = new_checkpoint_id
|
||||
|
||||
# Pause check — blocks if paused, respects cancel while waiting
|
||||
_wait_if_paused(job_id, node_name)
|
||||
|
||||
return result
|
||||
|
||||
wrapper.__name__ = node_fn.__name__
|
||||
|
||||
Reference in New Issue
Block a user