I am trying to fit GPD in python. Standard genpareto.fit from scipy works, however I would like to get parameter error (sensitivity) and unfortunately it is not provided. When I try to fit the GPD myself with log likelihood, it provides very unsatisfactory results (with differential_evolution). Could you help me with writing a proper fitting function for GPD or some trick to get the sensitivity for parameters? Thank you
from scipy.stats import genpareto
shape_fit, loc_fit, scale_fit = genpareto.fit(exceedances, floc=0)
def gpd_neg_log_likelihood(params, data):
xi, beta = params[0], params[1]
if beta <= 0:
return np.inf
if xi == 0:
log_likelihood = -np.sum(np.log(1 / beta) + data / beta)
else:
log_likelihood = -np.sum(
-np.log(beta) - (1 + (1 / xi)) * np.log(1 + (xi * data) / beta)
)
return -log_likelihood
result_global = differential_evolution(
gpd_neg_log_likelihood,
bounds,
args=(exceedances,),
strategy="best2bin",
popsize=50,
maxiter=1000,
tol=1e-6,
seed=42
)
exedences =
array([3.22527379e+00, 2.39897511e+00, 1.82064422e+00, 5.87481698e+00,
2.07146413e+01, 8.62207613e+00, 6.21303075e+00, 6.96559297e+00,
3.75901903e-01, 1.20695461e+01, 1.82064422e+00, 1.82064422e+00,
8.12079063e+00, 1.64619327e+01, 6.38521230e-02, 2.57750366e+02,
4.38286969e+00, 8.19913616e-01, 2.11346999e+00, 1.17481698e+00,
1.35702782e+01, 8.25036603e-01, 3.56515373e-01, 1.39729136e+01,
1.41563041e+00, 2.75688073e+00, 2.86415465e+01, 1.38845347e+00,
3.67431193e+00, 1.54698558e+01, 3.05176933e+00, 7.39515072e+00,
3.27762779e+00, 5.07254260e+01, 2.36369594e+00, 2.05832241e+00,
4.72280472e+00, 9.17889908e+00, 4.37758847e-01, 1.60987025e+00,
2.19331586e+00, 2.95373526e+00, 9.22018349e-01, 4.45655308e+01,
1.98582640e-01, 4.67802378e+00, 5.32045184e+00, 1.94702735e+01,
6.39060642e+00, 1.95600476e-01, 1.57491082e+00, 1.45499405e+01,
7.72889417e-03, 6.02074911e+01, 2.17625945e+01, 1.04262782e+01,
1.67582259e+01, 8.85850178e-02, 7.34705228e-01, 6.17352614e-02,
4.51112347e+00, 4.57230256e+00, 2.49207008e+00, 4.25472747e-01,
7.13181313e+00, 1.41655395e+00, 1.72969967e-01, 7.84816463e+00,
5.93159066e+00, 5.62347052e+00, 3.81413613e+00, 6.12303665e+00,
8.79319372e+00, 8.12303665e+00, 1.31464838e+01, 1.03115183e+01,
1.36623037e+01, 1.33481675e+01, 1.73560209e+00, 2.14397906e+00,
2.03926702e+00, 1.66375670e+01, 1.08000000e+01, 6.43355000e-01,
4.10000000e+01, 7.00000000e-01, 1.58500000e+00, 8.06654000e-01,
1.06300000e+00, 5.00000000e+00, 3.70000000e+00, 6.72500000e+00,
8.73900000e+00, 5.19106360e+01, 3.50000000e-01, 8.80000000e+00,
6.67000000e-01, 8.00000000e+00, 1.20000000e+00, 5.20000000e+00,
1.39000000e+01, 1.73000000e+00, 3.21000000e+00, 1.00000000e-01,
1.29845709e+00, 6.55400193e+00, 1.09416586e+01, 2.35260366e+01,
7.03616201e+00, 2.85920926e-01, 2.85920926e-01, 5.75216972e-01,
1.28220829e+01, 2.85920926e-01, 9.30568949e-02, 3.17888139e+00,
2.60028930e+00, 4.77000964e+00, 1.15684667e+01, 1.22434908e+01,
1.77838590e+01, 1.08627087e+00, 1.64285714e+00, 5.63172542e+00,
7.02319109e+00, 2.51391466e-01, 6.58627087e-02, 2.51391466e-01,
3.31261596e+00, 2.69675325e+01, 2.35371058e+01, 1.30528757e+01,
1.13831169e+01, 2.29220779e+00, 2.23293135e+01, 3.99814471e-01,
6.55936920e+00, 8.07977737e-01, 3.96196660e+00, 2.10667904e+00,
6.09554731e+00, 1.60111317e+00, 1.09152618e+01, 5.15971606e-01,
1.29241349e+01, 2.48580302e+00, 1.01109139e+00, 3.26543922e+01,
2.67524401e-01, 1.33229814e+00, 2.04214729e+00, 1.81455191e-01,
3.72803904e+00, 2.18380657e+01, 2.04214729e+00, 6.40195209e-01,
6.30124224e+00, 1.97883762e+01, 4.70408163e+00, 4.89352263e-01,
1.49525288e+01, 1.59849157e+00, 4.15195209e+01, 1.90785271e+01,
1.03828749e+01, 2.04214729e+00, 1.33096717e-03, 2.04538598e+01,
3.37311446e+00, 7.11180124e-01, 5.32519965e+00, 2.67524401e-01,
2.55559006e+01, 1.90554615e+01, 3.65914479e+01, 8.89458086e+00,
1.53636749e+01, 3.72946655e+00, 4.27180356e-01, 1.64690940e-01,
8.89458086e+00, 4.63717189e+00, 1.27392041e+00, 2.15495343e-01,
9.35224386e-01, 3.81033023e-03, 7.30186283e+00, 1.05880610e+01,
1.46913209e+02, 7.87044877e-01, 8.51354784e+00, 2.86748518e+00,
3.89881456e+00, 2.70533446e-01, 7.20110076e+00, 6.18501270e+00,
5.11854361e-01, 2.68878069e+01, 5.08425064e+00, 1.31282811e+01,
1.92574257e+00, 5.49834983e+00, 2.75082508e+00, 6.87623762e+00,
1.12211221e-01, 9.78465347e+00, 4.68481848e+00, 8.72937294e-01,
2.58580858e+00, 7.00495050e-01, 7.70132013e+00, 1.53267327e+01,
4.40594059e-01, 2.75082508e+00, 9.35148515e+00, 1.39157591e+02,
2.31303630e+01, 1.37656766e+01, 2.80528053e-02, 2.75577558e-01,
1.22392739e+01])
I am trying to fit GPD in python. Standard genpareto.fit from scipy works, however I would like to get parameter error (sensitivity) and unfortunately it is not provided. When I try to fit the GPD myself with log likelihood, it provides very unsatisfactory results (with differential_evolution). Could you help me with writing a proper fitting function for GPD or some trick to get the sensitivity for parameters? Thank you
from scipy.stats import genpareto
shape_fit, loc_fit, scale_fit = genpareto.fit(exceedances, floc=0)
def gpd_neg_log_likelihood(params, data):
xi, beta = params[0], params[1]
if beta <= 0:
return np.inf
if xi == 0:
log_likelihood = -np.sum(np.log(1 / beta) + data / beta)
else:
log_likelihood = -np.sum(
-np.log(beta) - (1 + (1 / xi)) * np.log(1 + (xi * data) / beta)
)
return -log_likelihood
result_global = differential_evolution(
gpd_neg_log_likelihood,
bounds,
args=(exceedances,),
strategy="best2bin",
popsize=50,
maxiter=1000,
tol=1e-6,
seed=42
)
exedences =
array([3.22527379e+00, 2.39897511e+00, 1.82064422e+00, 5.87481698e+00,
2.07146413e+01, 8.62207613e+00, 6.21303075e+00, 6.96559297e+00,
3.75901903e-01, 1.20695461e+01, 1.82064422e+00, 1.82064422e+00,
8.12079063e+00, 1.64619327e+01, 6.38521230e-02, 2.57750366e+02,
4.38286969e+00, 8.19913616e-01, 2.11346999e+00, 1.17481698e+00,
1.35702782e+01, 8.25036603e-01, 3.56515373e-01, 1.39729136e+01,
1.41563041e+00, 2.75688073e+00, 2.86415465e+01, 1.38845347e+00,
3.67431193e+00, 1.54698558e+01, 3.05176933e+00, 7.39515072e+00,
3.27762779e+00, 5.07254260e+01, 2.36369594e+00, 2.05832241e+00,
4.72280472e+00, 9.17889908e+00, 4.37758847e-01, 1.60987025e+00,
2.19331586e+00, 2.95373526e+00, 9.22018349e-01, 4.45655308e+01,
1.98582640e-01, 4.67802378e+00, 5.32045184e+00, 1.94702735e+01,
6.39060642e+00, 1.95600476e-01, 1.57491082e+00, 1.45499405e+01,
7.72889417e-03, 6.02074911e+01, 2.17625945e+01, 1.04262782e+01,
1.67582259e+01, 8.85850178e-02, 7.34705228e-01, 6.17352614e-02,
4.51112347e+00, 4.57230256e+00, 2.49207008e+00, 4.25472747e-01,
7.13181313e+00, 1.41655395e+00, 1.72969967e-01, 7.84816463e+00,
5.93159066e+00, 5.62347052e+00, 3.81413613e+00, 6.12303665e+00,
8.79319372e+00, 8.12303665e+00, 1.31464838e+01, 1.03115183e+01,
1.36623037e+01, 1.33481675e+01, 1.73560209e+00, 2.14397906e+00,
2.03926702e+00, 1.66375670e+01, 1.08000000e+01, 6.43355000e-01,
4.10000000e+01, 7.00000000e-01, 1.58500000e+00, 8.06654000e-01,
1.06300000e+00, 5.00000000e+00, 3.70000000e+00, 6.72500000e+00,
8.73900000e+00, 5.19106360e+01, 3.50000000e-01, 8.80000000e+00,
6.67000000e-01, 8.00000000e+00, 1.20000000e+00, 5.20000000e+00,
1.39000000e+01, 1.73000000e+00, 3.21000000e+00, 1.00000000e-01,
1.29845709e+00, 6.55400193e+00, 1.09416586e+01, 2.35260366e+01,
7.03616201e+00, 2.85920926e-01, 2.85920926e-01, 5.75216972e-01,
1.28220829e+01, 2.85920926e-01, 9.30568949e-02, 3.17888139e+00,
2.60028930e+00, 4.77000964e+00, 1.15684667e+01, 1.22434908e+01,
1.77838590e+01, 1.08627087e+00, 1.64285714e+00, 5.63172542e+00,
7.02319109e+00, 2.51391466e-01, 6.58627087e-02, 2.51391466e-01,
3.31261596e+00, 2.69675325e+01, 2.35371058e+01, 1.30528757e+01,
1.13831169e+01, 2.29220779e+00, 2.23293135e+01, 3.99814471e-01,
6.55936920e+00, 8.07977737e-01, 3.96196660e+00, 2.10667904e+00,
6.09554731e+00, 1.60111317e+00, 1.09152618e+01, 5.15971606e-01,
1.29241349e+01, 2.48580302e+00, 1.01109139e+00, 3.26543922e+01,
2.67524401e-01, 1.33229814e+00, 2.04214729e+00, 1.81455191e-01,
3.72803904e+00, 2.18380657e+01, 2.04214729e+00, 6.40195209e-01,
6.30124224e+00, 1.97883762e+01, 4.70408163e+00, 4.89352263e-01,
1.49525288e+01, 1.59849157e+00, 4.15195209e+01, 1.90785271e+01,
1.03828749e+01, 2.04214729e+00, 1.33096717e-03, 2.04538598e+01,
3.37311446e+00, 7.11180124e-01, 5.32519965e+00, 2.67524401e-01,
2.55559006e+01, 1.90554615e+01, 3.65914479e+01, 8.89458086e+00,
1.53636749e+01, 3.72946655e+00, 4.27180356e-01, 1.64690940e-01,
8.89458086e+00, 4.63717189e+00, 1.27392041e+00, 2.15495343e-01,
9.35224386e-01, 3.81033023e-03, 7.30186283e+00, 1.05880610e+01,
1.46913209e+02, 7.87044877e-01, 8.51354784e+00, 2.86748518e+00,
3.89881456e+00, 2.70533446e-01, 7.20110076e+00, 6.18501270e+00,
5.11854361e-01, 2.68878069e+01, 5.08425064e+00, 1.31282811e+01,
1.92574257e+00, 5.49834983e+00, 2.75082508e+00, 6.87623762e+00,
1.12211221e-01, 9.78465347e+00, 4.68481848e+00, 8.72937294e-01,
2.58580858e+00, 7.00495050e-01, 7.70132013e+00, 1.53267327e+01,
4.40594059e-01, 2.75082508e+00, 9.35148515e+00, 1.39157591e+02,
2.31303630e+01, 1.37656766e+01, 2.80528053e-02, 2.75577558e-01,
1.22392739e+01])
Share
Improve this question
asked yesterday
AntekAntek
12 bronze badges
New contributor
Antek is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
4
|
1 Answer
Reset to default 0Assuming your data is stored in exceedences
, you can produce bootstrap confidence intervals of the parameters like:
from scipy import stats
def statistic(x):
params = stats.genpareto.fit(x, floc=0)
return params[0], params[2]
res = stats.bootstrap((exceedences,), statistic)
res.confidence_interval
# ConfidenceInterval(low=array([0.37175838, 3.1441693 ]), high=array([0.89366334, 5.76439797]))
This takes a while, so another option is to use the observed Fisher information to estimate confidence intervals. See, for example, equation 3.15 of https://www.stat.umn.edu/geyer/s06/5102/notes/fish.pdf.
import numpy as np
from scipy import differentiate
def llf(p): # vectorized log-likelihood function
shape, scale = (pi[..., np.newaxis] for pi in p)
return np.sum(dist.logpdf(exceedences, shape, 0, scale), axis=-1)
dist = stats.genpareto
params = dist.fit(exceedences, floc=0)
p = np.asarray([params[0], params[2]])
# The observed Fisher information matrix is the negative Hessian
# of the LLF evaluated at the MLE parameters.
res = differentiate.hessian(llf, p, initial_step=0.1)
J = -res.ddf
# To get the standard error of each parameter, invert this matrix,
# extract the diagonal elements, and take the square root.
cov = np.linalg.inv(J)
se = np.sqrt(np.diag(cov))
# These can be used to estimate confidence intervals.
alpha = 0.05
z = stats.norm.isf(alpha/2)
p - z*se, p + z*se
# (array([0.36907457, 3.22570315]), array([0.83608425, 5.44687804]))
np.log1p(x)
fornp.log(1+x)
. The latter naive algebraic form has terrible rounding error for small x. See np.log1p. – Martin Brown Commented yesterday