albumentations-team · ternaus · Oct 18, 2024 · Oct 13, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -71,7 +71,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
     - name: Checkout
       uses: actions/checkout@v4
@@ -87,3 +87,5 @@ jobs:
       run: uv pip install --system -r requirements-dev.txt
     - name: Run checks
       run: pre-commit run
+    - name: Run tests
+      run: pytest
diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml
@@ -12,7 +12,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.8'
+        python-version: '3.9'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/.gitignore b/.gitignore
@@ -166,3 +166,5 @@ conda_build/
 *.ipynb
 
 .ruff_cache/
+
+*.csv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
       - id: check-ast
@@ -53,13 +53,13 @@ repos:
   #   hooks:
   #     - id: markdownlint
   - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "2.2.4"
+    rev: "2.3.1"
     hooks:
       - id: pyproject-fmt
         additional_dependencies: ["tomli"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.6
+    rev: v0.6.9
     hooks:
       # Run the linter.
       - id: ruff
@@ -68,9 +68,9 @@ repos:
       # Run the formatter.
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.2
+    rev: v1.12.0
     hooks:
       - id: mypy
-        files: ^albucore/
+        files: ^(albucore|benchmark)/
         args:
           [ --config-file=pyproject.toml ]
diff --git a/README.md b/README.md
@@ -1,45 +1,62 @@
-# Albucore
+# Albucore: High-Performance Image Processing Functions
 
-Albucore is a high-performance image processing library designed to optimize operations on images using Python and OpenCV, building upon the foundations laid by the popular Albumentations library. It offers specialized optimizations for different image data types and aims to provide faster processing times through efficient algorithm implementations.
+Albucore is a library of optimized atomic functions designed for efficient image processing. These functions serve as the foundation for [Albumentations](https://github.com/albumentations-team/albumentations), a popular image augmentation library.
 
-## Features
+## Overview
 
-- Optimized image multiplication operations for both `uint8` and `float32` data types.
-- Support for single-channel and multi-channel images.
-- Custom decorators to manage channel dimensions and output constraints.
+Image processing operations can be implemented in various ways, each with its own performance characteristics depending on the image type, size, and number of channels. Albucore aims to provide the fastest implementation for each operation by leveraging different backends such as NumPy, OpenCV, and custom optimized code.
 
-## Installation
+Key features:
+
+- Optimized atomic image processing functions
+- Automatic selection of the fastest implementation based on input image characteristics
+- Seamless integration with Albumentations
+- Extensive benchmarking for performance validation
 
-Install Albucore using pip:
+## Installation
 
 ```bash
-pip install -U albucore
+pip install albucore
 ```
 
-## Example
-
-Here's how you can use Albucore to multiply an image by a constant or a vector:
+## Usage
 
 ```python
-import cv2
 import numpy as np
-from albucore import multiply
+import albucore
+# Create a sample image
+image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
+# Apply a function
+result = albucore.multiply(image, 1.5)
+```
 
-# Load an image
-img = cv2.imread('path_to_your_image.jpg')
+Albucore automatically selects the most efficient implementation based on the input image type and characteristics.
 
-# Multiply by a constant
-multiplied_image = multiply(img, 1.5)
+## Functions
 
-# Multiply by a vector
-multiplier = [1.5, 1.2, 0.9]  # Different multiplier for each channel
-multiplied_image = multiply(img, multiplier)
-```
+Albucore includes optimized implementations for various image processing operations, including:
 
-## Benchmarks
+- Arithmetic operations (add, multiply, power)
+- Normalization (per-channel, global)
+- Geometric transformations (vertical flip, horizontal flip)
+- Helper decorators (to_float, to_uint8)
 
-For detailed benchmark results, including other configurations and data types, refer to the [Benchmark](benchmark/results/) in the repository.
+## Performance
+
+Albucore uses a combination of techniques to achieve high performance:
+
+1. **Multiple Implementations**: Each function may have several implementations using different backends (NumPy, OpenCV, custom code).
+2. **Automatic Selection**: The library automatically chooses the fastest implementation based on the input image type, size, and number of channels.
+3. **Optimized Algorithms**: Custom implementations are optimized for specific use cases, often outperforming general-purpose libraries.
+
+### Benchmarks
+
+We maintain an extensive benchmark suite to ensure Albucore's performance across various scenarios. You can find the benchmarks and their results in the [benchmarks](./benchmarks/README.md) directory.
 
 ## License
 
-Distributed under the MIT License. See LICENSE for more information.
+MIT
+
+## Acknowledgements
+
+Albucore is part of the [Albumentations](https://github.com/albumentations-team/albumentations) project. We'd like to thank all contributors to [Albumentations](https://albumentations.ai/) and the broader computer vision community for their inspiration and support.
diff --git a/albucore/functions.py b/albucore/functions.py
@@ -5,6 +5,7 @@
 
 import cv2
 import numpy as np
+import stringzilla as sz
 
 from albucore.decorators import contiguous, preserve_channel_dim
 from albucore.utils import (
@@ -26,7 +27,9 @@
 
 
 def create_lut_array(
-    dtype: type[np.number], value: float | np.ndarray, operation: Literal["add", "multiply", "power"]
+    dtype: type[np.number],
+    value: float | np.ndarray,
+    operation: Literal["add", "multiply", "power"],
 ) -> np.ndarray:
     max_value = MAX_VALUES_BY_DTYPE[dtype]
 
@@ -42,16 +45,30 @@ def create_lut_array(
     raise ValueError(f"Unsupported operation: {operation}")
 
 
-def apply_lut(img: np.ndarray, value: float | np.ndarray, operation: Literal["add", "multiply", "power"]) -> np.ndarray:
+@contiguous
+def sz_lut(img: np.ndarray, lut: np.ndarray, inplace: bool = True) -> np.ndarray:
+    if not inplace:
+        img = img.copy()
+
+    sz.translate(memoryview(img), memoryview(lut), inplace=True)
+    return img
+
+
+def apply_lut(
+    img: np.ndarray,
+    value: float | np.ndarray,
+    operation: Literal["add", "multiply", "power"],
+    inplace: bool,
+) -> np.ndarray:
     dtype = img.dtype
 
     if isinstance(value, (int, float)):
         lut = create_lut_array(dtype, value, operation)
-        return cv2.LUT(img, clip(lut, dtype))
+        return sz_lut(img, clip(lut, dtype), inplace)
 
     num_channels = img.shape[-1]
     luts = create_lut_array(dtype, value, operation)
-    return cv2.merge([cv2.LUT(img[:, :, i], clip(luts[i], dtype)) for i in range(num_channels)])
+    return cv2.merge([sz_lut(img[:, :, i], clip(luts[i], dtype), inplace) for i in range(num_channels)])
 
 
 def prepare_value_opencv(
@@ -84,17 +101,18 @@ def prepare_value_opencv(
 
 
 def apply_numpy(
-    img: np.ndarray, value: float | np.ndarray, operation: Literal["add", "multiply", "power"]
+    img: np.ndarray,
+    value: float | np.ndarray,
+    operation: Literal["add", "multiply", "power"],
 ) -> np.ndarray:
     if operation == "add" and img.dtype == np.uint8:
         value = np.int16(value)
 
     return np_operations[operation](img.astype(np.float32), value)
 
 
-@preserve_channel_dim
-def multiply_lut(img: np.ndarray, value: np.ndarray | float) -> np.ndarray:
-    return apply_lut(img, value, "multiply")
+def multiply_lut(img: np.ndarray, value: np.ndarray | float, inplace: bool) -> np.ndarray:
+    return apply_lut(img, value, "multiply", inplace)
 
 
 @preserve_channel_dim
@@ -109,18 +127,18 @@ def multiply_numpy(img: np.ndarray, value: float | np.ndarray) -> np.ndarray:
     return apply_numpy(img, value, "multiply")
 
 
-def multiply_by_constant(img: np.ndarray, value: float) -> np.ndarray:
+def multiply_by_constant(img: np.ndarray, value: float, inplace: bool) -> np.ndarray:
     if img.dtype == np.uint8:
-        return multiply_lut(img, value)
+        return multiply_lut(img, value, inplace)
     if img.dtype == np.float32:
         return multiply_numpy(img, value)
     return multiply_opencv(img, value)
 
 
-def multiply_by_vector(img: np.ndarray, value: np.ndarray, num_channels: int) -> np.ndarray:
+def multiply_by_vector(img: np.ndarray, value: np.ndarray, num_channels: int, inplace: bool) -> np.ndarray:
     # Handle uint8 images separately to use 1a lookup table for performance
     if img.dtype == np.uint8:
-        return multiply_lut(img, value)
+        return multiply_lut(img, value, inplace)
     # Check if the number of channels exceeds the maximum that OpenCV can handle
     if num_channels > MAX_OPENCV_WORKING_CHANNELS:
         return multiply_numpy(img, value)
@@ -132,15 +150,15 @@ def multiply_by_array(img: np.ndarray, value: np.ndarray) -> np.ndarray:
 
 
 @clipped
-def multiply(img: np.ndarray, value: ValueType) -> np.ndarray:
+def multiply(img: np.ndarray, value: ValueType, inplace: bool = False) -> np.ndarray:
     num_channels = get_num_channels(img)
     value = convert_value(value, num_channels)
 
     if isinstance(value, (float, int)):
-        return multiply_by_constant(img, value)
+        return multiply_by_constant(img, value, inplace)
 
     if isinstance(value, np.ndarray) and value.ndim == 1:
-        return multiply_by_vector(img, value, num_channels)
+        return multiply_by_vector(img, value, num_channels, inplace)
 
     return multiply_by_array(img, value)
 
@@ -164,19 +182,18 @@ def add_numpy(img: np.ndarray, value: float | np.ndarray) -> np.ndarray:
     return apply_numpy(img, value, "add")
 
 
-@preserve_channel_dim
-def add_lut(img: np.ndarray, value: np.ndarray | float) -> np.ndarray:
-    return apply_lut(img, value, "add")
+def add_lut(img: np.ndarray, value: np.ndarray | float, inplace: bool) -> np.ndarray:
+    return apply_lut(img, value, "add", inplace)
 
 
 def add_constant(img: np.ndarray, value: float) -> np.ndarray:
     return add_opencv(img, value)
 
 
 @clipped
-def add_vector(img: np.ndarray, value: np.ndarray) -> np.ndarray:
+def add_vector(img: np.ndarray, value: np.ndarray, inplace: bool) -> np.ndarray:
     if img.dtype == np.uint8:
-        return add_lut(img, value)
+        return add_lut(img, value, inplace)
     return add_opencv(img, value)
 
 
@@ -185,7 +202,7 @@ def add_array(img: np.ndarray, value: np.ndarray) -> np.ndarray:
 
 
 @clipped
-def add(img: np.ndarray, value: ValueType) -> np.ndarray:
+def add(img: np.ndarray, value: ValueType, inplace: bool = False) -> np.ndarray:
     num_channels = get_num_channels(img)
     value = convert_value(value, num_channels)
 
@@ -201,7 +218,7 @@ def add(img: np.ndarray, value: ValueType) -> np.ndarray:
     if img.dtype == np.uint8:
         value = value.astype(np.int16)
 
-    return add_vector(img, value) if value.ndim == 1 else add_array(img, value)
+    return add_vector(img, value, inplace) if value.ndim == 1 else add_array(img, value)
 
 
 def normalize_numpy(img: np.ndarray, mean: float | np.ndarray, denominator: float | np.ndarray) -> np.ndarray:
@@ -282,17 +299,17 @@ def power_opencv(img: np.ndarray, value: float) -> np.ndarray:
     raise ValueError(f"Unsupported image type {img.dtype} for power operation with value {value}")
 
 
-@preserve_channel_dim
-def power_lut(img: np.ndarray, exponent: float | np.ndarray) -> np.ndarray:
-    return apply_lut(img, exponent, "power")
+# @preserve_channel_dim
+def power_lut(img: np.ndarray, exponent: float | np.ndarray, inplace: bool = False) -> np.ndarray:
+    return apply_lut(img, exponent, "power", inplace)
 
 
 @clipped
-def power(img: np.ndarray, exponent: ValueType) -> np.ndarray:
+def power(img: np.ndarray, exponent: ValueType, inplace: bool = False) -> np.ndarray:
     num_channels = get_num_channels(img)
     exponent = convert_value(exponent, num_channels)
     if img.dtype == np.uint8:
-        return power_lut(img, exponent)
+        return power_lut(img, exponent, inplace)
 
     if isinstance(exponent, (float, int)):
         return power_opencv(img, exponent)
@@ -349,10 +366,7 @@ def multiply_add_numpy(img: np.ndarray, factor: ValueType, value: ValueType) ->
 
     result = np.multiply(img, factor) if factor != 0 else np.zeros_like(img)
 
-    if value != 0:
-        return np.add(result, value)
-
-    return result
+    return result if value == 0 else np.add(result, value)
 
 
 @preserve_channel_dim
@@ -366,20 +380,17 @@ def multiply_add_opencv(img: np.ndarray, factor: ValueType, value: ValueType) ->
         if factor != 0
         else np.zeros_like(result, dtype=img.dtype)
     )
-    if value != 0:
-        result = cv2.add(result, np.ones_like(result) * value, dtype=cv2.CV_64F)
-    return result
+    return result if value == 0 else cv2.add(result, np.ones_like(result) * value, dtype=cv2.CV_64F)
 
 
-@preserve_channel_dim
-def multiply_add_lut(img: np.ndarray, factor: ValueType, value: ValueType) -> np.ndarray:
+def multiply_add_lut(img: np.ndarray, factor: ValueType, value: ValueType, inplace: bool) -> np.ndarray:
     dtype = img.dtype
     max_value = MAX_VALUES_BY_DTYPE[dtype]
     num_channels = get_num_channels(img)
 
     if isinstance(factor, (float, int)) and isinstance(value, (float, int)):
         lut = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype)
-        return cv2.LUT(img, lut)
+        return sz_lut(img, lut, inplace)
 
     if isinstance(factor, np.ndarray) and factor.shape != ():
         factor = factor.reshape(-1, 1)
@@ -389,17 +400,17 @@ def multiply_add_lut(img: np.ndarray, factor: ValueType, value: ValueType) -> np
 
     luts = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype)
 
-    return cv2.merge([cv2.LUT(img[:, :, i], luts[i]) for i in range(num_channels)])
+    return cv2.merge([sz_lut(img[:, :, i], luts[i], inplace) for i in range(num_channels)])
 
 
 @clipped
-def multiply_add(img: np.ndarray, factor: ValueType, value: ValueType) -> np.ndarray:
+def multiply_add(img: np.ndarray, factor: ValueType, value: ValueType, inplace: bool = False) -> np.ndarray:
     num_channels = get_num_channels(img)
     factor = convert_value(factor, num_channels)
     value = convert_value(value, num_channels)
 
     if img.dtype == np.uint8:
-        return multiply_add_lut(img, factor, value)
+        return multiply_add_lut(img, factor, value, inplace)
 
     return multiply_add_opencv(img, factor, value)