118 lines
2.9 KiB
Python
118 lines
2.9 KiB
Python
"""
|
|
Image preprocessing pipeline for crops before OCR.
|
|
|
|
Each step is independently toggleable via config.
|
|
Operates on numpy arrays (BGR or RGB), returns processed array.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
import numpy as np
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray:
|
|
"""Convert to grayscale and apply Otsu binarization."""
|
|
import cv2
|
|
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
|
else:
|
|
gray = image
|
|
|
|
_, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
|
# Convert back to 3-channel for downstream compatibility
|
|
result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
|
|
return result
|
|
|
|
|
|
def deskew(image: np.ndarray) -> np.ndarray:
|
|
"""Correct slight rotation using minimum area rectangle."""
|
|
import cv2
|
|
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
|
else:
|
|
gray = image
|
|
|
|
coords = np.column_stack(np.where(gray < 128))
|
|
if len(coords) < 10:
|
|
return image
|
|
|
|
rect = cv2.minAreaRect(coords)
|
|
angle = rect[-1]
|
|
|
|
# Normalize angle
|
|
if angle < -45:
|
|
angle = -(90 + angle)
|
|
else:
|
|
angle = -angle
|
|
|
|
if abs(angle) < 0.5:
|
|
return image
|
|
|
|
h, w = image.shape[:2]
|
|
center = (w // 2, h // 2)
|
|
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
result = cv2.warpAffine(
|
|
image, rotation_matrix, (w, h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE,
|
|
)
|
|
return result
|
|
|
|
|
|
def enhance_contrast(image: np.ndarray) -> np.ndarray:
|
|
"""Apply CLAHE (adaptive histogram equalization) for contrast normalization."""
|
|
import cv2
|
|
|
|
if len(image.shape) == 3:
|
|
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
|
|
l_channel = lab[:, :, 0]
|
|
else:
|
|
l_channel = image
|
|
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
enhanced = clahe.apply(l_channel)
|
|
|
|
if len(image.shape) == 3:
|
|
lab[:, :, 0] = enhanced
|
|
result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
|
|
else:
|
|
result = enhanced
|
|
|
|
return result
|
|
|
|
|
|
def preprocess(
|
|
image: np.ndarray,
|
|
do_binarize: bool = False,
|
|
do_deskew: bool = False,
|
|
do_contrast: bool = True,
|
|
) -> np.ndarray:
|
|
"""
|
|
Run the preprocessing pipeline on a crop image.
|
|
|
|
Each step is independently toggleable. Order: contrast → deskew → binarize.
|
|
Contrast first (works best on color), binarize last (destroys color info).
|
|
"""
|
|
result = image
|
|
|
|
if do_contrast:
|
|
result = enhance_contrast(result)
|
|
logger.debug("Preprocessing: contrast enhanced")
|
|
|
|
if do_deskew:
|
|
result = deskew(result)
|
|
logger.debug("Preprocessing: deskewed")
|
|
|
|
if do_binarize:
|
|
result = binarize(result)
|
|
logger.debug("Preprocessing: binarized")
|
|
|
|
return result
|