Skip to content

Process

Utility functions for processing model input/output.

INTERPOLATION_MAP module-attribute

INTERPOLATION_MAP: dict[ImageInterpolation, int] = {
    "nearest": INTER_NEAREST,
    "linear": INTER_LINEAR,
    "cubic": INTER_CUBIC,
    "area": INTER_AREA,
    "lanczos4": INTER_LANCZOS4,
}

Mapping from interpolation method name to OpenCV constant.

read_plate_image

read_plate_image(
    image_path: PathLike,
    image_color_mode: ImageColorMode = "grayscale",
) -> ndarray

Reads an image from disk in the requested colour mode.

Parameters:

Name Type Description Default
image_path PathLike

Path to the image file.

required
image_color_mode ImageColorMode

"grayscale" for single-channel or "rgb" for three-channel colour. Defaults to "grayscale".

'grayscale'

Returns:

Type Description
ndarray

The image as a NumPy array. Grayscale images have shape (H, W), RGB images have shape (H, W, 3).

Raises:

Type Description
FileNotFoundError

If the image file does not exist.

ValueError

If the image cannot be decoded.

Source code in fast_plate_ocr/core/process.py
def read_plate_image(
    image_path: PathLike,
    image_color_mode: ImageColorMode = "grayscale",
) -> np.ndarray:
    """
    Reads an image from disk in the requested colour mode.

    Args:
        image_path: Path to the image file.
        image_color_mode: ``"grayscale"`` for single-channel or ``"rgb"`` for three-channel
            colour. Defaults to ``"grayscale"``.

    Returns:
        The image as a NumPy array.
            Grayscale images have shape ``(H, W)``, RGB images have shape ``(H, W, 3)``.

    Raises:
        FileNotFoundError: If the image file does not exist.
        ValueError: If the image cannot be decoded.
    """
    image_path = str(image_path)

    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    if image_color_mode == "rgb":
        raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if raw is None:
            raise ValueError(f"Failed to decode image: {image_path}")
        img = cv2.cvtColor(raw, cv2.COLOR_BGR2RGB)
    else:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            raise ValueError(f"Failed to decode image: {image_path}")

    return img

resize_image

resize_image(
    img: ndarray,
    img_height: int,
    img_width: int,
    image_color_mode: ImageColorMode = "grayscale",
    keep_aspect_ratio: bool = False,
    interpolation_method: ImageInterpolation = "linear",
    padding_color: PaddingColor = (114, 114, 114),
) -> ndarray

Resizes an in-memory image with optional aspect-ratio preservation and padding.

Parameters:

Name Type Description Default
img ndarray

Input image.

required
img_height int

Target image height.

required
img_width int

Target image width.

required
image_color_mode ImageColorMode

Output colour mode, "grayscale" or "rgb".

'grayscale'
keep_aspect_ratio bool

If True, maintain the original aspect ratio using letter-box padding. Defaults to False.

False
interpolation_method ImageInterpolation

Interpolation method used for resizing. Defaults to "linear".

'linear'
padding_color PaddingColor

Padding colour (scalar for grayscale, tuple for RGB). Defaults to (114, 114, 114).

(114, 114, 114)

Returns:

Type Description
ndarray

The resized image with shape (H, W, C) (a channel axis is added for grayscale).

Raises:

Type Description
ValueError

If padding_color length is not 3 for RGB output.

Source code in fast_plate_ocr/core/process.py
def resize_image(
    img: np.ndarray,
    img_height: int,
    img_width: int,
    image_color_mode: ImageColorMode = "grayscale",
    keep_aspect_ratio: bool = False,
    interpolation_method: ImageInterpolation = "linear",
    padding_color: PaddingColor = (114, 114, 114),
) -> np.ndarray:
    """
    Resizes an in-memory image with optional aspect-ratio preservation and padding.

    Args:
        img: Input image.
        img_height: Target image height.
        img_width: Target image width.
        image_color_mode: Output colour mode, ``"grayscale"`` or ``"rgb"``.
        keep_aspect_ratio: If ``True``, maintain the original aspect ratio using letter-box
            padding. Defaults to ``False``.
        interpolation_method: Interpolation method used for resizing. Defaults to ``"linear"``.
        padding_color: Padding colour (scalar for grayscale, tuple for RGB). Defaults to
            ``(114, 114, 114)``.

    Returns:
        The resized image with shape ``(H, W, C)`` (a channel axis is added for grayscale).

    Raises:
        ValueError: If ``padding_color`` length is not 3 for RGB output.
    """
    # pylint: disable=too-many-locals

    interpolation = INTERPOLATION_MAP[interpolation_method]

    if not keep_aspect_ratio:
        img = cv2.resize(img, (img_width, img_height), interpolation=interpolation)

    else:
        orig_h, orig_w = img.shape[:2]
        # Scale ratio (new / old) - choose the limiting dimension
        r = min(img_height / orig_h, img_width / orig_w)
        # Compute the size of the resized (unpadded) image
        new_unpad_w, new_unpad_h = round(orig_w * r), round(orig_h * r)
        # Resize if necessary
        if (orig_w, orig_h) != (new_unpad_w, new_unpad_h):
            img = cv2.resize(img, (new_unpad_w, new_unpad_h), interpolation=interpolation)
        # Padding on each side
        dw, dh = (img_width - new_unpad_w) / 2, (img_height - new_unpad_h) / 2
        top, bottom, left, right = (
            round(dh - 0.1),
            round(dh + 0.1),
            round(dw - 0.1),
            round(dw + 0.1),
        )
        border_color: PaddingColor
        # Ensure padding colour matches channel count
        if image_color_mode == "grayscale":
            if isinstance(padding_color, tuple):
                border_color = int(padding_color[0])
            else:
                border_color = int(padding_color)
        elif image_color_mode == "rgb":
            if isinstance(padding_color, tuple):
                if len(padding_color) != 3:
                    raise ValueError("padding_color must be length-3 for RGB images")
                border_color = tuple(int(c) for c in padding_color)  # type: ignore[assignment]
            else:
                border_color = (int(padding_color),) * 3
        img = cv2.copyMakeBorder(
            img,
            top,
            bottom,
            left,
            right,
            borderType=cv2.BORDER_CONSTANT,
            value=border_color,  # type: ignore[arg-type]
        )
    # Add channel axis for gray so output is HxWxC
    if image_color_mode == "grayscale" and img.ndim == 2:
        img = np.expand_dims(img, axis=-1)

    return img

read_and_resize_plate_image

read_and_resize_plate_image(
    image_path: PathLike,
    img_height: int,
    img_width: int,
    image_color_mode: ImageColorMode = "grayscale",
    keep_aspect_ratio: bool = False,
    interpolation_method: ImageInterpolation = "linear",
    padding_color: PaddingColor = (114, 114, 114),
) -> ndarray

Reads an image from disk and resizes it for model input.

Parameters:

Name Type Description Default
image_path PathLike

Path to the image.

required
img_height int

Desired output height.

required
img_width int

Desired output width.

required
image_color_mode ImageColorMode

"grayscale" or "rgb". Defaults to "grayscale".

'grayscale'
keep_aspect_ratio bool

Whether to preserve aspect ratio via letter-boxing. Defaults to False.

False
interpolation_method ImageInterpolation

Interpolation method to use. Defaults to "linear".

'linear'
padding_color PaddingColor

Colour used for padding when aspect ratio is preserved. Defaults to (114, 114, 114).

(114, 114, 114)

Returns:

Type Description
ndarray

The resized (and possibly padded) image with shape (H, W, C).

Source code in fast_plate_ocr/core/process.py
def read_and_resize_plate_image(
    image_path: PathLike,
    img_height: int,
    img_width: int,
    image_color_mode: ImageColorMode = "grayscale",
    keep_aspect_ratio: bool = False,
    interpolation_method: ImageInterpolation = "linear",
    padding_color: PaddingColor = (114, 114, 114),
) -> np.ndarray:
    """
    Reads an image from disk and resizes it for model input.

    Args:
        image_path: Path to the image.
        img_height: Desired output height.
        img_width: Desired output width.
        image_color_mode: ``"grayscale"`` or ``"rgb"``. Defaults to ``"grayscale"``.
        keep_aspect_ratio: Whether to preserve aspect ratio via letter-boxing. Defaults to
            ``False``.
        interpolation_method: Interpolation method to use. Defaults to ``"linear"``.
        padding_color: Colour used for padding when aspect ratio is preserved. Defaults to
            ``(114, 114, 114)``.

    Returns:
        The resized (and possibly padded) image with shape ``(H, W, C)``.
    """
    img = read_plate_image(image_path, image_color_mode=image_color_mode)
    return resize_image(
        img,
        img_height,
        img_width,
        image_color_mode=image_color_mode,
        keep_aspect_ratio=keep_aspect_ratio,
        interpolation_method=interpolation_method,
        padding_color=padding_color,
    )

preprocess_image

preprocess_image(images: ndarray) -> ndarray

Converts image data to the format expected by the model.

The model itself handles pixel-value normalisation, so this function only ensures the batch-dimension and dtype are correct.

Parameters:

Name Type Description Default
images ndarray

Image or batch of images with shape (H, W, C) or (N, H, W, C).

required

Returns:

Type Description
ndarray

A NumPy array with shape (N, H, W, C) and dtype uint8.

Raises:

Type Description
ValueError

If the input does not have 3 or 4 dimensions.

Source code in fast_plate_ocr/core/process.py
def preprocess_image(images: np.ndarray) -> np.ndarray:
    """
    Converts image data to the format expected by the model.

    The model itself handles pixel-value normalisation, so this function only ensures the
    batch-dimension and dtype are correct.

    Args:
        images: Image or batch of images with shape ``(H, W, C)`` or ``(N, H, W, C)``.

    Returns:
        A NumPy array with shape ``(N, H, W, C)`` and dtype ``uint8``.

    Raises:
        ValueError: If the input does not have 3 or 4 dimensions.
    """
    # single sample (H, W, C)
    if images.ndim == 3:
        images = np.expand_dims(images, axis=0)

    if images.ndim != 4:
        raise ValueError("Expected input of shape (N, H, W, C).")

    return images.astype(np.uint8)

postprocess_output

postprocess_output(
    model_output: ndarray,
    max_plate_slots: int,
    model_alphabet: str,
    return_confidence: bool = False,
) -> tuple[list[str], ndarray] | list[str]

Decodes model predictions into licence-plate strings.

Parameters:

Name Type Description Default
model_output ndarray

Raw output tensor from the model.

required
max_plate_slots int

Maximum number of character positions.

required
model_alphabet str

Alphabet used by the model.

required
return_confidence bool

If True, also return per-character confidence scores. Defaults to False.

False

Returns:

Type Description
tuple[list[str], ndarray] | list[str]

If return_confidence is False: a list of decoded plate strings. If True: a two-tuple (plates, probs) where

  • plates is the list of decoded strings, and
  • probs is an array of shape (N, max_plate_slots) with the corresponding confidence scores.
Source code in fast_plate_ocr/core/process.py
def postprocess_output(
    model_output: np.ndarray,
    max_plate_slots: int,
    model_alphabet: str,
    return_confidence: bool = False,
) -> tuple[list[str], np.ndarray] | list[str]:
    """
    Decodes model predictions into licence-plate strings.

    Args:
        model_output: Raw output tensor from the model.
        max_plate_slots: Maximum number of character positions.
        model_alphabet: Alphabet used by the model.
        return_confidence: If ``True``, also return per-character confidence scores.
            Defaults to ``False``.

    Returns:
        If ``return_confidence`` is ``False``: a list of decoded plate strings.
            If ``True``: a two-tuple ``(plates, probs)`` where

            * ``plates`` is the list of decoded strings, and
            * ``probs`` is an array of shape ``(N, max_plate_slots)`` with the corresponding
              confidence scores.
    """
    predictions = model_output.reshape((-1, max_plate_slots, len(model_alphabet)))
    prediction_indices = np.argmax(predictions, axis=-1)
    alphabet_array = np.array(list(model_alphabet))
    plate_chars = alphabet_array[prediction_indices]
    plates: list[str] = np.apply_along_axis("".join, 1, plate_chars).tolist()
    if return_confidence:
        probs = np.max(predictions, axis=-1)
        return plates, probs
    return plates