I have a very basic group by function that I want to use in a Cython object but it's something like 400 times slower than a similar function in Python JITed by Numba
This is my Cython function
@cython.boundscheck(False) # Deactivate bounds checking
@cython.wraparound(False) # Deactivate negative indexing.
@cythonall
def groupby(
in_index: cython.long[:], in_values: cython.float[:], number_of_results: cython.long
) -> cython.double[:]:
results = np.zeros(number_of_results, dtype=np.float64)
results_view: cython.float[:] = results
index: cython.long[:] = in_index
val: cython.float[:] = in_values
index_max: cython.Py_ssize_t = in_index.shape[0]
n: cython.Py_ssize_t
for n in range(index_max):
results_view[index[n]] += val[n]
return results
But this function with Numba
@jit(nopython=True, fastmath=True)
def agg_sims(group_idx, a, number_of_sims):
output = np.zeros(number_of_sims, dtype=np.float64)
for i in range(group_idx.shape[0]):
output[group_idx[i]] += a[i]
return output
I'd like to use cython because Numba doesn't deal well with the object I built hold the index and values part.
I isolated out the function into it's own file and compiled it with cython (on windows with VS build tools) and using a set of random data but the the index is ordered
this is an example of the data
sim_index=np.array([0,1,2,2,3,3,3,3,3,4,4,5,5,5,5,6,6,7,7,7,7,7,8,8,9,9,9])
loss_values=np.array([983218.798545568,78773.246069412,427915.701586101,384401.565066934,976474.931385909,
491002.57968459,449277.841899304,2157814.69086177,634849.51236713,683151.164470444,951717.254327894,
338593.160096285,111153.600572457,327118.874300972,585857.183862426,1793115.50344123,775898.742468206,
236320.911958699,412790.930873261,894075.886633843,400713.081209467,1142732.59966746,
651378.893317142,238021.483213581,50557.1083702021,75622.454060533,153180.479257057])
The actual data is about 2.5m values
groupby in cython took gave me this with timeit
805 ms ± 30.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
but the numba version with the same data
4.35 ms ± 667 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
I'm at a loss on why the Cython function is so much slower then the Numba function.
The Cython function doesn't show any major python interactions in the annotation HTML file. Really just the NumPy function.
Any suggestions would really be helpful.
Update: I put all the code into a jupyter notebook so show a full case. but in this example the cython is about the same speed as the numba version. I did have to change the longs to longlongs. I made the same change to the original function and recompiled and it still has performance issus so i'm guessing there is a compiler setting i'm missing in the setup.py and that the notebook uses.
'''
#cell
%load_ext cython
import numpy as np
import cython
from numba import jit
#cell
%%cython
import numpy as np
import cython
@cython.boundscheck(False) # Deactivate bounds checking
@cython.wraparound(False) # Deactivate negative indexing.
def groupby_cy(
in_index: cython.longlong[::1],
in_values: cython.double[::1],
number_of_results: cython.longlong,
) -> cython.double[:]:
results: cython.double[::1] = np.zeros(number_of_results, dtype=np.float64)
results_view: cython.double[::1] = results
index_max: cython.Py_ssize_t = in_index.shape[0]
n: cython.Py_ssize_t
for n in range(index_max):
results_view[in_index[n]] += in_values[n]
return results
#cell
@jit("float64[:](int64[:], float64[:], int64)", nopython=True, fastmath=True)
def groupby_nb(group_idx, a, number_of_sims):
output = np.zeros(number_of_sims, dtype=np.float64)
for i in range(group_idx.shape[0]):
output[group_idx[i]] += a[i]
return output
#cell
#sample data
number_of_sims = 1_000_000
rng_generator = np.random.default_rng(100)
number_of_events = np.random.poisson(6, number_of_sims)
sim_index = np.arange(number_of_sims).repeat(number_of_events)
sim_values = rng_generator.normal(10000, 10, len(sim_index))
#cell
#timings
%%timeit
test_nb = groupby_nb(sim_index, sim_values, number_of_sims)
#cell
%%timeit
test_cy = groupby_cy(sim_index, sim_values, number_of_sims)
''' This is the current setup.py
'''
from typing import Annotated
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy
extensions = [
Extension(
"groupby_cy",
[r".\libs\groupby_cy.py"],
extra_compile_args=["/O2"],
),
]
setup(
ext_modules=cythonize(
module_list=extensions,
compiler_directives={
"language_level": "3",
},
annotate=True,
),
include_dirs=[numpy.get_include()],
)
'''