This commit is contained in:
2026-03-30 07:22:14 -03:00
parent d0707333fd
commit 4220b0418e
182 changed files with 3668 additions and 5231 deletions

View File

@@ -0,0 +1,117 @@
"""
Image preprocessing pipeline for crops before OCR.
Each step is independently toggleable via config.
Operates on numpy arrays (BGR or RGB), returns processed array.
"""
from __future__ import annotations
import logging
import numpy as np
logger = logging.getLogger(__name__)
def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray:
"""Convert to grayscale and apply Otsu binarization."""
import cv2
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
_, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Convert back to 3-channel for downstream compatibility
result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
return result
def deskew(image: np.ndarray) -> np.ndarray:
"""Correct slight rotation using minimum area rectangle."""
import cv2
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
coords = np.column_stack(np.where(gray < 128))
if len(coords) < 10:
return image
rect = cv2.minAreaRect(coords)
angle = rect[-1]
# Normalize angle
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
if abs(angle) < 0.5:
return image
h, w = image.shape[:2]
center = (w // 2, h // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
result = cv2.warpAffine(
image, rotation_matrix, (w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE,
)
return result
def enhance_contrast(image: np.ndarray) -> np.ndarray:
"""Apply CLAHE (adaptive histogram equalization) for contrast normalization."""
import cv2
if len(image.shape) == 3:
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
l_channel = lab[:, :, 0]
else:
l_channel = image
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(l_channel)
if len(image.shape) == 3:
lab[:, :, 0] = enhanced
result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
else:
result = enhanced
return result
def preprocess(
image: np.ndarray,
do_binarize: bool = False,
do_deskew: bool = False,
do_contrast: bool = True,
) -> np.ndarray:
"""
Run the preprocessing pipeline on a crop image.
Each step is independently toggleable. Order: contrast → deskew → binarize.
Contrast first (works best on color), binarize last (destroys color info).
"""
result = image
if do_contrast:
result = enhance_contrast(result)
logger.debug("Preprocessing: contrast enhanced")
if do_deskew:
result = deskew(result)
logger.debug("Preprocessing: deskewed")
if do_binarize:
result = binarize(result)
logger.debug("Preprocessing: binarized")
return result