python - CuPy ROI Analysis significantly slower than NumPy version on RTX 4090

I have two versions of an ROI analysis class - one using NumPy and one using CuPy. The CuPy version is running much slower despite using an RTX 4090. Both versions perform the same operations:

# Standard library imports
import json
from typing import Any, Dict, List, Optional, Tuple, Union

# Third-party library imports
import cupy as cp
import pandas as pd
from cupyx.scipy import stats
from cupyx.scipy.ndimage import label
from cucim.skimage.measure import regionprops_table
import cudf

# Local imports
from HiTMicTools.utils import adjust_dimensions, stack_indexer

# Type hints
from numpy.typing import NDArray
from pandas import DataFrame, Series

def roi_skewness(regionmask, intensity):
    """Cupy version for the ROI standard deviation as defined in analysis_tools.utils"""
    roi_intensities = intensity[regionmask]

    try:
        # Check if there are enough unique values in roi_intensities
        unique_values = cp.unique(roi_intensities)
        if len(unique_values) < 10:
            return 0

        return float(stats.skew(roi_intensities, bias=False))
    except Exception:
        return 0

def roi_std_dev(regionmask, intensity):
    """Cupy version for the ROI standard deviation as defined in analysis_tools.utils"""
    roi_intensities = intensity[regionmask]
    return float(cp.std(roi_intensities))

def coords_centroid(coords):
    centroid = cp.mean(coords, axis=0)
    return pd.Series(centroid, index=["slice", "y", "x"])


def convert_to_list_and_dump(row):
    return json.dumps(row.tolist())

def stack_indexer_ingpu(
    nframes: Union[int, List[int], range] = [0],
    nslices: Union[int, List[int], range] = [0],
    nchannels: Union[int, List[int], range] = [0],
) -> cp.ndarray:
    """
    Generate an index table for accessing specific frames, slices, and channels in an image stack.
    This aims to simplify the process of iterating over different combinations of frame, slice,
    and channel indices with for loops.

    Args:
        nframes (Union[int, List[int], range], optional): Frame indices. Defaults to [0].
        nslices (Union[int, List[int], range], optional): Slice indices. Defaults to [0].
        nchannels (Union[int, List[int], range], optional): Channel indices. Defaults to [0].

    Returns:
        cp.ndarray: Index table with shape (n_combinations, 3), where each row represents a combination
                    of frame, slice, and channel indices.

    Raises:
        ValueError: If any dimension contains negative integers.
        TypeError: If any dimension is not an integer, list of integers, or range object.
    """
    dimensions = []
    for dimension in [nframes, nslices, nchannels]:
        if isinstance(dimension, int):
            if dimension < 0:
                raise ValueError("Dimensions must be positive integers or lists.")
            dimensions.append([dimension])
        elif isinstance(dimension, (list, range)):
            if not all(isinstance(i, int) and i >= 0 for i in dimension):
                raise ValueError(
                    "All elements in the list dimensions must be positive integers."
                )
            dimensions.append(dimension)
        else:
            raise TypeError(
                "All dimensions must be either positive integers or lists of positive integers."
            )

    combinations = list(itertools.product(*dimensions))
    index_table = cp.array(combinations)
    return index_table

class RoiAnalyser:
    def __init__(self, image, probability_map, stack_order=("TSCXY", "TXY")):
        image = adjust_dimensions(image, stack_order[0])
        probability_map = adjust_dimensions(probability_map, stack_order[1])

        self.img = cp.asarray(image)
        self.proba_map = cp.asarray(probability_map)
        self.stack_order = stack_order


    def create_binary_mask(self, threshold=0.5):
        """
        Create a binary mask from an image stack of probabilities.

        Args:
            image_stack (cp.ndarray): A 5D cupy array with shape (frames, slices, channels, height, width) containing probabilities.
            threshold (float): The threshold value to use for binarization (default: 0.5).

        Returns:
            cupy.ndarray: A 5D numpy array with the same shape as the input, where values above the threshold are set to 1, and values below or equal to the threshold are set to 0.
        """

        # Convert probabilities to binary values
        self.binary_mask = self.proba_map > threshold

    def clean_binmask(self, min_pixel_size=20):
        """
        Clean the binary mask by removing small ROIs.
        Args:
            min_pixel_size (int): Minimum ROI size in pixels.

        Returns:
            cleaned_mask (cp.ndarray): Cleaned binary mask.
        """
        labeled, num_features = self.get_labels(return_value=True)
        sizes = cp.bincount(labeled.ravel())[1:]
        mask_sizes = sizes >= min_pixel_size
        label_map = cp.zeros(num_features + 1, dtype=int)
        label_map[1:][mask_sizes] = cp.arange(1, cp.sum(mask_sizes) + 1)
        cleaned_labeled = label_map[labeled]
        cleaned_mask = cleaned_labeled > 0
        self.binary_mask = cleaned_mask
        
    def get_labels(self, return_value=False):
        """
        Get the labeled mask for the binary mask.

        Returns:
            None
        """
        labeled_mask = cp.empty_like(self.binary_mask, dtype=int)
        num_rois = 0
        max_label = 0

        for i in range(self.binary_mask.shape[0]):
            labeled_frame, num = label(self.binary_mask[i])
            labeled_mask[i] = cp.where(
                labeled_frame != 0, labeled_frame + max_label, labeled_frame
            )
            max_label += num
            num_rois += num

        if return_value:
            return labeled_mask, num_rois
        else:
            self.total_rois = num_rois
            self.labeled_mask = labeled_mask

    def get_roi_measurements(
        self,
        target_channel=0,
        target_slice=0,
        properties=["label", "centroid", "mean_intensity"],
        extra_properties=None,
    ):
        """
        Get measurements for each ROI in the labeled mask for a specific channel and all frames.

        Args:
            img (cupy.ndarray): The original image.
            labeled_mask (cupy.ndarray): The labeled mask containing the ROIs.
            properties (list, optional): A list of properties to measure for each ROI.
                Defaults to ['mean_intensity', 'centroid'].

        Returns:
            list: A list of dictionaries, where each dictionary contains the measurements
                for a single ROI.
        """

        assert (
            self.labeled_mask is not None
        ), "Run get_labels() first to generate labeled mask"

        img = self.img[:, target_slice, target_channel, :, :]
        labeled_mask = self.labeled_mask[:, target_slice, 0, :, :]

        all_roi_properties = []

        for frame in range(img.shape[0]):
            img_frame = img[frame]
            labeled_mask_frame = labeled_mask[frame]

            roi_properties = regionprops_table(
                labeled_mask_frame,
                intensity_image=img_frame,
                properties=properties,
                separator="_",
                extra_properties=extra_properties,
            )
            roi_properties_df = cudf.DataFrame(roi_properties)
            roi_properties_df["frame"] = frame
            roi_properties_df["slice"] = target_channel
            roi_properties_df["channel"] = target_slice

            all_roi_properties.append(roi_properties_df)

        all_roi_properties_cudf = cudf.concat(all_roi_properties, ignore_index=True)

        if "coords" in all_roi_properties_cudf.columns:
            all_roi_properties_cudf["coords"] = all_roi_properties_cudf["coords"].apply(
                convert_to_list_and_dump
            )

        # rearrange the columns
        required_cols = ["label", "frame", "slice", "channel"]
        other_cols = [
            col for col in all_roi_properties_cudf.columns if col not in required_cols
        ]

        cols = required_cols + other_cols
        all_roi_properties_cudf = all_roi_properties_cudf[cols]

        # Convert to pandas DataFrame at the very end
        all_roi_properties_df = all_roi_properties_cudf.to_pandas()

        return all_roi_properties_df

NumPy -> CuPy array operations
scipy -> cupyx.scipy for label()
skimage -> cucim.skimage for regionprops pandas -> cudf for
DataFrames The GPU version uses identical algorithms but performs
data transfers between CPU/GPU memory.

The processing pipeline includes:

Binary mask creation
Connected component labeling
Region property measurements
DataFrame operations

Environment:

RTX 4090 24GB
CUDA 11.8
CuPy 12.0.0
Python 3.9

What could be causing this performance degradation? Are there specific optimizations I should implement for the GPU version?

CPU RoiAnalyser:

# Standard library imports
import json
from typing import Any, Dict, List, Optional, Tuple, Union

# Third-party library imports
import numpy as np
import pandas as pd
from scipy.ndimage import label 
from skimage.measure import regionprops_table

# Local imports
from HiTMicTools.utils import adjust_dimensions, stack_indexer

# Type hints
from numpy.typing import NDArray
from pandas import DataFrame, Series

def coords_centroid(coords):
    centroid = np.mean(coords, axis=0)
    return pd.Series(centroid, index=["slice", "y", "x"])


def convert_to_list_and_dump(row):
    return json.dumps(row.tolist())


class RoiAnalyser:
    def __init__(self, image, probability_map, stack_order=("TSCXY", "TXY")):
        image = adjust_dimensions(image, stack_order[0])
        probability_map = adjust_dimensions(probability_map, stack_order[1])

        self.img = image
        self.proba_map = probability_map
        self.stack_order = stack_order

        pass

    def create_binary_mask(self, threshold=0.5):
        """
        Create a binary mask from an image stack of probabilities.

        Args:
            image_stack (np.ndarray): A 5D numpy array with shape (frames, slices, channels, height, width) containing probabilities.
            threshold (float): The threshold value to use for binarization (default: 0.5).

        Returns:
            np.ndarray: A 5D numpy array with the same shape as the input, where values above the threshold are set to 1, and values below or equal to the threshold are set to 0.
        """

        # Convert probabilities to binary values
        self.binary_mask = self.proba_map > threshold

    def clean_binmask(self, min_pixel_size=20):
        """
        Clean the binary mask by removing small ROIs.
        Args:
            min_pixel_size (int): Minimum ROI size in pixels.

        Returns:
            cleaned_mask (np.ndarray): Cleaned binary mask.
        """
        labeled, num_features = self.get_labels(return_value=True)
        sizes = np.bincount(labeled.ravel())[1:]  # Exclude background (label 0)
        mask_sizes = sizes >= min_pixel_size
        label_map = np.zeros(num_features + 1, dtype=int)
        label_map[1:][mask_sizes] = np.arange(1, np.sum(mask_sizes) + 1)
        cleaned_labeled = label_map[labeled]
        cleaned_mask = cleaned_labeled > 0
        self.binary_mask = cleaned_mask
        
    def get_labels(self, return_value=False):
        """
        Get the labeled mask for the binary mask.

        Returns:
            None
        """
        labeled_mask = np.empty_like(self.binary_mask, dtype=int)
        num_rois = 0
        max_label = 0

        for i in range(self.binary_mask.shape[0]):
            labeled_frame, num = label(self.binary_mask[i])
            labeled_mask[i] = np.where(
                labeled_frame != 0, labeled_frame + max_label, labeled_frame
            )
            max_label += num
            num_rois += num

        if return_value:
            return labeled_mask, num_rois
        else:
            self.total_rois = num_rois
            self.labeled_mask = labeled_mask

    def get_roi_measurements(
        self,
        target_channel=0,
        target_slice=0,
        properties=["label", "centroid", "mean_intensity"],
        extra_properties=None,
    ):
        """
        Get measurements for each ROI in the labeled mask for a specific channel and all frames.

        Args:
            img (numpy.ndarray): The original image.
            labeled_mask (numpy.ndarray): The labeled mask containing the ROIs.
            properties (list, optional): A list of properties to measure for each ROI.
                Defaults to ['mean_intensity', 'centroid'].

        Returns:
            list: A list of dictionaries, where each dictionary contains the measurements
                for a single ROI.
        """

        assert (
            self.labeled_mask is not None
        ), "Run get_labels() first to generate labeled mask"

        img = self.img[:, target_slice, target_channel, :, :]
        labeled_mask = self.labeled_mask[:, target_slice, 0, :, :]

        all_roi_properties = []

        for frame in range(img.shape[0]):
            img_frame = img[frame]
            labeled_mask_frame = labeled_mask[frame]

            roi_properties = regionprops_table(
                labeled_mask_frame,
                intensity_image=img_frame,
                properties=properties,
                separator="_",
                extra_properties=extra_properties,
            )
            roi_properties_df = pd.DataFrame(roi_properties)
            roi_properties_df["frame"] = frame
            roi_properties_df["slice"] = target_channel
            roi_properties_df["channel"] = target_slice

            all_roi_properties.append(roi_properties_df)

        all_roi_properties_df = pd.concat(all_roi_properties, ignore_index=True)

        if "coords" in all_roi_properties_df.columns:
            all_roi_properties_df["coords"] = all_roi_properties_df["coords"].apply(
                convert_to_list_and_dump
            )

        # rearrange the columns
        required_cols = ["label", "frame", "slice", "channel"]
        other_cols = [
            col for col in all_roi_properties_df.columns if col not in required_cols
        ]

        cols = required_cols + other_cols
        all_roi_properties_df = all_roi_properties_df[cols]

        return all_roi_properties_df

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

python - CuPy ROI Analysis significantly slower than NumPy version on RTX 4090 - Stack Overflow

与本文相关的文章

评论列表(0)