""" Image preprocessing pipeline for crops before OCR. Each step is independently toggleable via config. Operates on numpy arrays (BGR or RGB), returns processed array. """ from __future__ import annotations import logging import numpy as np logger = logging.getLogger(__name__) def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray: """Convert to grayscale and apply Otsu binarization.""" import cv2 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) else: gray = image _, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Convert back to 3-channel for downstream compatibility result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB) return result def deskew(image: np.ndarray) -> np.ndarray: """Correct slight rotation using minimum area rectangle.""" import cv2 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) else: gray = image coords = np.column_stack(np.where(gray < 128)) if len(coords) < 10: return image rect = cv2.minAreaRect(coords) angle = rect[-1] # Normalize angle if angle < -45: angle = -(90 + angle) else: angle = -angle if abs(angle) < 0.5: return image h, w = image.shape[:2] center = (w // 2, h // 2) rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) result = cv2.warpAffine( image, rotation_matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE, ) return result def enhance_contrast(image: np.ndarray) -> np.ndarray: """Apply CLAHE (adaptive histogram equalization) for contrast normalization.""" import cv2 if len(image.shape) == 3: lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB) l_channel = lab[:, :, 0] else: l_channel = image clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) enhanced = clahe.apply(l_channel) if len(image.shape) == 3: lab[:, :, 0] = enhanced result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB) else: result = enhanced return result def preprocess( image: np.ndarray, do_binarize: bool = False, do_deskew: bool = False, do_contrast: bool = True, ) -> np.ndarray: """ Run the preprocessing pipeline on a crop image. Each step is independently toggleable. Order: contrast → deskew → binarize. Contrast first (works best on color), binarize last (destroys color info). """ result = image if do_contrast: result = enhance_contrast(result) logger.debug("Preprocessing: contrast enhanced") if do_deskew: result = deskew(result) logger.debug("Preprocessing: deskewed") if do_binarize: result = binarize(result) logger.debug("Preprocessing: binarized") return result