""" Pipeline replay — re-run from any stage with different config. Loads a checkpoint, applies config overrides, builds a subgraph starting from the target stage, and invokes it. """ from __future__ import annotations import logging import uuid from detect import emit from detect.checkpoint import load_checkpoint, list_checkpoints from detect.graph import NODES, build_graph logger = logging.getLogger(__name__) class OverrideProfile: """ Wraps a ContentTypeProfile and patches config methods with overrides. Override dict structure: { "frame_extraction": {"fps": 1.0}, "scene_filter": {"hamming_threshold": 12}, "detection": {"confidence_threshold": 0.5}, "ocr": {"languages": ["en", "es"], "min_confidence": 0.3}, "resolver": {"fuzzy_threshold": 60}, } """ def __init__(self, base, overrides: dict): self._base = base self._overrides = overrides def __getattr__(self, name): return getattr(self._base, name) def _patch(self, config, key: str): patches = self._overrides.get(key, {}) for k, v in patches.items(): if hasattr(config, k): setattr(config, k, v) return config def frame_extraction_config(self): return self._patch(self._base.frame_extraction_config(), "frame_extraction") def scene_filter_config(self): return self._patch(self._base.scene_filter_config(), "scene_filter") def detection_config(self): return self._patch(self._base.detection_config(), "detection") def ocr_config(self): return self._patch(self._base.ocr_config(), "ocr") def resolver_config(self): return self._patch(self._base.resolver_config(), "resolver") def vlm_prompt(self, crop_context): return self._base.vlm_prompt(crop_context) def aggregate(self, detections): return self._base.aggregate(detections) def auxiliary_detections(self, source): return self._base.auxiliary_detections(source) def replay_from( job_id: str, start_stage: str, config_overrides: dict | None = None, checkpoint: bool = True, ) -> dict: """ Replay the pipeline from a specific stage. Loads the checkpoint from the stage immediately before start_stage, applies config overrides, and runs the subgraph from start_stage onward. Returns the final state dict. """ if start_stage not in NODES: raise ValueError(f"Unknown stage: {start_stage!r}. Options: {NODES}") start_idx = NODES.index(start_stage) # Load checkpoint from the stage before start_stage if start_idx == 0: raise ValueError("Cannot replay from the first stage — just run the full pipeline") previous_stage = NODES[start_idx - 1] available = list_checkpoints(job_id) if previous_stage not in available: raise ValueError( f"No checkpoint for stage {previous_stage!r} (job {job_id}). " f"Available: {available}" ) logger.info("Replaying job %s from %s (loading checkpoint: %s)", job_id, start_stage, previous_stage) state = load_checkpoint(job_id, previous_stage) # Apply config overrides if config_overrides: state["config_overrides"] = config_overrides # Set run context for SSE events run_id = str(uuid.uuid4())[:8] emit.set_run_context( run_id=run_id, parent_job_id=job_id, run_type="replay", ) # Build subgraph starting from start_stage graph = build_graph(checkpoint=checkpoint, start_from=start_stage) pipeline = graph.compile() try: result = pipeline.invoke(state) finally: emit.clear_run_context() return result