phase 12

2026-03-26 06:10:19 -03:00
parent 731964ca10
commit e27cb5bcc3
41 changed files with 2079 additions and 95 deletions
--- a/detect/stages/preprocess.py
+++ b/detect/stages/preprocess.py
@@ -0,0 +1,128 @@
+"""
+Stage 3.5 — Preprocessing
+
+Runs between YOLO detection and OCR. Applies configurable image
+preprocessing to each detected region crop: contrast enhancement,
+deskewing, binarization.
+
+Operates on the crops derived from boxes_by_frame, produces
+preprocessed_crops keyed by (frame_sequence, box_index).
+"""
+
+from __future__ import annotations
+
+import logging
+
+import numpy as np
+
+from detect import emit
+from detect.models import BoundingBox, Frame
+
+logger = logging.getLogger(__name__)
+
+
+def _crop_region(frame: Frame, box: BoundingBox) -> np.ndarray:
+    h, w = frame.image.shape[:2]
+    x1 = max(0, box.x)
+    y1 = max(0, box.y)
+    x2 = min(w, box.x + box.w)
+    y2 = min(h, box.y + box.h)
+    return frame.image[y1:y2, x1:x2]
+
+
+def preprocess_regions(
+    frames: list[Frame],
+    boxes_by_frame: dict[int, list[BoundingBox]],
+    do_contrast: bool = True,
+    do_deskew: bool = False,
+    do_binarize: bool = False,
+    inference_url: str | None = None,
+    job_id: str | None = None,
+) -> dict[str, np.ndarray]:
+    """
+    Preprocess cropped regions from YOLO detections.
+
+    Returns dict keyed by "{frame_seq}_{box_idx}" → preprocessed crop.
+    These are passed to the OCR stage instead of raw crops.
+    """
+    total_regions = sum(len(boxes) for boxes in boxes_by_frame.values())
+    any_active = do_contrast or do_deskew or do_binarize
+
+    if not any_active:
+        emit.log(job_id, "Preprocess", "INFO",
+                 f"Preprocessing disabled, passing {total_regions} regions through")
+        return {}
+
+    mode = "remote" if inference_url else "local"
+    emit.log(job_id, "Preprocess", "INFO",
+             f"Preprocessing {total_regions} regions (mode={mode}, "
+             f"contrast={do_contrast}, deskew={do_deskew}, binarize={do_binarize})")
+
+    frame_map = {f.sequence: f for f in frames}
+    preprocessed: dict[str, np.ndarray] = {}
+    processed_count = 0
+
+    for seq, boxes in boxes_by_frame.items():
+        frame = frame_map.get(seq)
+        if not frame:
+            continue
+
+        for idx, box in enumerate(boxes):
+            crop = _crop_region(frame, box)
+            if crop.size == 0:
+                continue
+
+            key = f"{seq}_{idx}"
+
+            if inference_url:
+                result = _preprocess_remote(crop, inference_url,
+                                            do_contrast, do_deskew, do_binarize)
+            else:
+                result = _preprocess_local(crop, do_contrast, do_deskew, do_binarize)
+
+            preprocessed[key] = result
+            processed_count += 1
+
+    emit.log(job_id, "Preprocess", "INFO",
+             f"Preprocessed {processed_count} regions")
+
+    return preprocessed
+
+
+def _preprocess_remote(crop: np.ndarray, inference_url: str,
+                       do_contrast: bool, do_deskew: bool, do_binarize: bool) -> np.ndarray:
+    """Call GPU server /preprocess endpoint."""
+    import base64
+    import io
+
+    import requests
+    from PIL import Image
+
+    img = Image.fromarray(crop)
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG", quality=85)
+    image_b64 = base64.b64encode(buf.getvalue()).decode()
+
+    resp = requests.post(
+        f"{inference_url.rstrip('/')}/preprocess",
+        json={
+            "image": image_b64,
+            "contrast": do_contrast,
+            "deskew": do_deskew,
+            "binarize": do_binarize,
+        },
+        timeout=30,
+    )
+    resp.raise_for_status()
+    data = resp.json()
+
+    result_bytes = base64.b64decode(data["image"])
+    result_img = Image.open(io.BytesIO(result_bytes)).convert("RGB")
+    return np.array(result_img)
+
+
+def _preprocess_local(crop: np.ndarray,
+                      do_contrast: bool, do_deskew: bool, do_binarize: bool) -> np.ndarray:
+    """Run preprocessing in-process (requires opencv-python-headless)."""
+    from gpu.models.preprocess import preprocess
+    return preprocess(crop, do_binarize=do_binarize, do_deskew=do_deskew, do_contrast=do_contrast)