Files
mediaproc/gpu/models/preprocess.py
2026-03-26 07:40:14 -03:00

118 lines
2.9 KiB
Python

"""
Image preprocessing pipeline for crops before OCR.
Each step is independently toggleable via config.
Operates on numpy arrays (BGR or RGB), returns processed array.
"""
from __future__ import annotations
import logging
import numpy as np
logger = logging.getLogger(__name__)
def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray:
"""Convert to grayscale and apply Otsu binarization."""
import cv2
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
_, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Convert back to 3-channel for downstream compatibility
result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
return result
def deskew(image: np.ndarray) -> np.ndarray:
"""Correct slight rotation using minimum area rectangle."""
import cv2
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
coords = np.column_stack(np.where(gray < 128))
if len(coords) < 10:
return image
rect = cv2.minAreaRect(coords)
angle = rect[-1]
# Normalize angle
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
if abs(angle) < 0.5:
return image
h, w = image.shape[:2]
center = (w // 2, h // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
result = cv2.warpAffine(
image, rotation_matrix, (w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE,
)
return result
def enhance_contrast(image: np.ndarray) -> np.ndarray:
"""Apply CLAHE (adaptive histogram equalization) for contrast normalization."""
import cv2
if len(image.shape) == 3:
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
l_channel = lab[:, :, 0]
else:
l_channel = image
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(l_channel)
if len(image.shape) == 3:
lab[:, :, 0] = enhanced
result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
else:
result = enhanced
return result
def preprocess(
image: np.ndarray,
do_binarize: bool = False,
do_deskew: bool = False,
do_contrast: bool = True,
) -> np.ndarray:
"""
Run the preprocessing pipeline on a crop image.
Each step is independently toggleable. Order: contrast → deskew → binarize.
Contrast first (works best on color), binarize last (destroys color info).
"""
result = image
if do_contrast:
result = enhance_contrast(result)
logger.debug("Preprocessing: contrast enhanced")
if do_deskew:
result = deskew(result)
logger.debug("Preprocessing: deskewed")
if do_binarize:
result = binarize(result)
logger.debug("Preprocessing: binarized")
return result