I have this function to calculate Cohen's D for some csv files
import statistics
import numpy as np
def cohen_d(a,b):
float_a = []
float_b = []
for x in a:
try:
float_a.append(float(x))
except ValueError:
return "NAN"
for y in b:
try:
float_b.append(float(y))
except ValueError:
return "NAN"
try:
res=(statistics.fmean(float_a) - statistics.fmean(float_b)) / statistics.stdev(float_a+float_b)
except ZeroDivisionError:
res=0
return res
def cohen_d_np(a,b):
float_a = []
float_b = []
for x in a:
try:
float_a.append(float(x))
except ValueError:
return "NAN"
for y in b:
try:
float_b.append(float(y))
except ValueError:
return "NAN"
float_a = np.array(float_a)
float_b = np.array(float_b)
try:
res=(statistics.fmean(float_a) - statistics.fmean(float_b)) / statistics.stdev(np.concatenate([float_a,float_b]))
except ZeroDivisionError:
res=0
return res
When I try to run it on my data, I systematically get this error for the np
version, or just 'float' object has no attribute 'numerator'
for the list based version:
RuleException:
AttributeError in file /work/user/cburnard/PIPELINES/ChIPseq/Snakefile, line 111:
'numpy.float64' object has no attribute 'numerator'
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 903, in __rule_calculate_multimapbw_indiv_scores
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 162, in effect_size_mmb
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 111, in cohen_d_np
File "/tools/devel/python/Python-3.11.1/lib/python3.11/statistics.py", line 922, in stdev
File "/tools/devel/python/Python-3.11.1/lib/python3.11/concurrent/futures/thread.py", line 58, in run
Where is this error coming from? I don't know exactly on what line of data it is occurring, but in theory even if it's non-numerical due to some bug, I should still get the return "NAN"
instead of this error.
I have this function to calculate Cohen's D for some csv files
import statistics
import numpy as np
def cohen_d(a,b):
float_a = []
float_b = []
for x in a:
try:
float_a.append(float(x))
except ValueError:
return "NAN"
for y in b:
try:
float_b.append(float(y))
except ValueError:
return "NAN"
try:
res=(statistics.fmean(float_a) - statistics.fmean(float_b)) / statistics.stdev(float_a+float_b)
except ZeroDivisionError:
res=0
return res
def cohen_d_np(a,b):
float_a = []
float_b = []
for x in a:
try:
float_a.append(float(x))
except ValueError:
return "NAN"
for y in b:
try:
float_b.append(float(y))
except ValueError:
return "NAN"
float_a = np.array(float_a)
float_b = np.array(float_b)
try:
res=(statistics.fmean(float_a) - statistics.fmean(float_b)) / statistics.stdev(np.concatenate([float_a,float_b]))
except ZeroDivisionError:
res=0
return res
When I try to run it on my data, I systematically get this error for the np
version, or just 'float' object has no attribute 'numerator'
for the list based version:
RuleException:
AttributeError in file /work/user/cburnard/PIPELINES/ChIPseq/Snakefile, line 111:
'numpy.float64' object has no attribute 'numerator'
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 903, in __rule_calculate_multimapbw_indiv_scores
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 162, in effect_size_mmb
File "/work/user/cburnard/PIPELINES/ChIPseq/Snakefile", line 111, in cohen_d_np
File "/tools/devel/python/Python-3.11.1/lib/python3.11/statistics.py", line 922, in stdev
File "/tools/devel/python/Python-3.11.1/lib/python3.11/concurrent/futures/thread.py", line 58, in run
Where is this error coming from? I don't know exactly on what line of data it is occurring, but in theory even if it's non-numerical due to some bug, I should still get the return "NAN"
instead of this error.
1 Answer
Reset to default 1It looks like you are calling standard library statistics
functions on a NumPy array. statistics
is not meant to accept NumPy arrays, so it is raising the error. To fix the problem, use +
instead of np.concatenate
to concatenate the lists, or use the corresponding NumPy functions; e.g., np.mean
instead of statistics.mean
and np.std
(with ddof=1
) instead of statistics.stdev
.
import numpy as np
import statistics
a = [1, 2, 3, 4, 5]
b = [6, 7, 8, 9]
# statistics.stdev(np.concatenate([a, b])) # AttributeError
statistics.stdev(a + b) # 2.7386127875258306
np.std(np.concatenate([a, b]), ddof=1) # np.float64(2.7386127875258306)
np.std(a + b, ddof=1) # np.float64(2.7386127875258306)
import
s and sample data in your code so it runs, and so it's obvious thatstatistics
is from the standard library. – Matt Haberland Commented Mar 24 at 18:09statistics
rather than the Numpy functions I suggested. – Matt Haberland Commented Mar 25 at 15:11