phase 4
This commit is contained in:
117
core/gpu/models/preprocess.py
Normal file
117
core/gpu/models/preprocess.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Image preprocessing pipeline for crops before OCR.
|
||||
|
||||
Each step is independently toggleable via config.
|
||||
Operates on numpy arrays (BGR or RGB), returns processed array.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def binarize(image: np.ndarray, threshold: int = 128) -> np.ndarray:
|
||||
"""Convert to grayscale and apply Otsu binarization."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
|
||||
_, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
|
||||
# Convert back to 3-channel for downstream compatibility
|
||||
result = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
|
||||
return result
|
||||
|
||||
|
||||
def deskew(image: np.ndarray) -> np.ndarray:
|
||||
"""Correct slight rotation using minimum area rectangle."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
|
||||
coords = np.column_stack(np.where(gray < 128))
|
||||
if len(coords) < 10:
|
||||
return image
|
||||
|
||||
rect = cv2.minAreaRect(coords)
|
||||
angle = rect[-1]
|
||||
|
||||
# Normalize angle
|
||||
if angle < -45:
|
||||
angle = -(90 + angle)
|
||||
else:
|
||||
angle = -angle
|
||||
|
||||
if abs(angle) < 0.5:
|
||||
return image
|
||||
|
||||
h, w = image.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
result = cv2.warpAffine(
|
||||
image, rotation_matrix, (w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REPLICATE,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def enhance_contrast(image: np.ndarray) -> np.ndarray:
|
||||
"""Apply CLAHE (adaptive histogram equalization) for contrast normalization."""
|
||||
import cv2
|
||||
|
||||
if len(image.shape) == 3:
|
||||
lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
|
||||
l_channel = lab[:, :, 0]
|
||||
else:
|
||||
l_channel = image
|
||||
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(l_channel)
|
||||
|
||||
if len(image.shape) == 3:
|
||||
lab[:, :, 0] = enhanced
|
||||
result = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
|
||||
else:
|
||||
result = enhanced
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def preprocess(
|
||||
image: np.ndarray,
|
||||
do_binarize: bool = False,
|
||||
do_deskew: bool = False,
|
||||
do_contrast: bool = True,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Run the preprocessing pipeline on a crop image.
|
||||
|
||||
Each step is independently toggleable. Order: contrast → deskew → binarize.
|
||||
Contrast first (works best on color), binarize last (destroys color info).
|
||||
"""
|
||||
result = image
|
||||
|
||||
if do_contrast:
|
||||
result = enhance_contrast(result)
|
||||
logger.debug("Preprocessing: contrast enhanced")
|
||||
|
||||
if do_deskew:
|
||||
result = deskew(result)
|
||||
logger.debug("Preprocessing: deskewed")
|
||||
|
||||
if do_binarize:
|
||||
result = binarize(result)
|
||||
logger.debug("Preprocessing: binarized")
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user