scipy - Fitting GPD (Generalized Pareto Distribution) in Python

I am trying to fit GPD in python. Standard genpareto.fit from scipy works, however I would like to get parameter error (sensitivity) and unfortunately it is not provided. When I try to fit the GPD myself with log likelihood, it provides very unsatisfactory results (with differential_evolution). Could you help me with writing a proper fitting function for GPD or some trick to get the sensitivity for parameters? Thank you

from scipy.stats import genpareto
shape_fit, loc_fit, scale_fit = genpareto.fit(exceedances, floc=0)


def gpd_neg_log_likelihood(params, data):
    xi, beta = params[0], params[1]
    if beta <= 0:
        return np.inf
    if xi == 0:
        log_likelihood = -np.sum(np.log(1 / beta) + data / beta)
    else:
        log_likelihood = -np.sum(
            -np.log(beta) - (1 + (1 / xi)) * np.log(1 + (xi * data) / beta)
        )
    return -log_likelihood

result_global = differential_evolution(
    gpd_neg_log_likelihood,
    bounds,
    args=(exceedances,),
    strategy="best2bin",
    popsize=50,
    maxiter=1000,
    tol=1e-6,
    seed=42
)

exedences = 
array([3.22527379e+00, 2.39897511e+00, 1.82064422e+00, 5.87481698e+00,
       2.07146413e+01, 8.62207613e+00, 6.21303075e+00, 6.96559297e+00,
       3.75901903e-01, 1.20695461e+01, 1.82064422e+00, 1.82064422e+00,
       8.12079063e+00, 1.64619327e+01, 6.38521230e-02, 2.57750366e+02,
       4.38286969e+00, 8.19913616e-01, 2.11346999e+00, 1.17481698e+00,
       1.35702782e+01, 8.25036603e-01, 3.56515373e-01, 1.39729136e+01,
       1.41563041e+00, 2.75688073e+00, 2.86415465e+01, 1.38845347e+00,
       3.67431193e+00, 1.54698558e+01, 3.05176933e+00, 7.39515072e+00,
       3.27762779e+00, 5.07254260e+01, 2.36369594e+00, 2.05832241e+00,
       4.72280472e+00, 9.17889908e+00, 4.37758847e-01, 1.60987025e+00,
       2.19331586e+00, 2.95373526e+00, 9.22018349e-01, 4.45655308e+01,
       1.98582640e-01, 4.67802378e+00, 5.32045184e+00, 1.94702735e+01,
       6.39060642e+00, 1.95600476e-01, 1.57491082e+00, 1.45499405e+01,
       7.72889417e-03, 6.02074911e+01, 2.17625945e+01, 1.04262782e+01,
       1.67582259e+01, 8.85850178e-02, 7.34705228e-01, 6.17352614e-02,
       4.51112347e+00, 4.57230256e+00, 2.49207008e+00, 4.25472747e-01,
       7.13181313e+00, 1.41655395e+00, 1.72969967e-01, 7.84816463e+00,
       5.93159066e+00, 5.62347052e+00, 3.81413613e+00, 6.12303665e+00,
       8.79319372e+00, 8.12303665e+00, 1.31464838e+01, 1.03115183e+01,
       1.36623037e+01, 1.33481675e+01, 1.73560209e+00, 2.14397906e+00,
       2.03926702e+00, 1.66375670e+01, 1.08000000e+01, 6.43355000e-01,
       4.10000000e+01, 7.00000000e-01, 1.58500000e+00, 8.06654000e-01,
       1.06300000e+00, 5.00000000e+00, 3.70000000e+00, 6.72500000e+00,
       8.73900000e+00, 5.19106360e+01, 3.50000000e-01, 8.80000000e+00,
       6.67000000e-01, 8.00000000e+00, 1.20000000e+00, 5.20000000e+00,
       1.39000000e+01, 1.73000000e+00, 3.21000000e+00, 1.00000000e-01,
       1.29845709e+00, 6.55400193e+00, 1.09416586e+01, 2.35260366e+01,
       7.03616201e+00, 2.85920926e-01, 2.85920926e-01, 5.75216972e-01,
       1.28220829e+01, 2.85920926e-01, 9.30568949e-02, 3.17888139e+00,
       2.60028930e+00, 4.77000964e+00, 1.15684667e+01, 1.22434908e+01,
       1.77838590e+01, 1.08627087e+00, 1.64285714e+00, 5.63172542e+00,
       7.02319109e+00, 2.51391466e-01, 6.58627087e-02, 2.51391466e-01,
       3.31261596e+00, 2.69675325e+01, 2.35371058e+01, 1.30528757e+01,
       1.13831169e+01, 2.29220779e+00, 2.23293135e+01, 3.99814471e-01,
       6.55936920e+00, 8.07977737e-01, 3.96196660e+00, 2.10667904e+00,
       6.09554731e+00, 1.60111317e+00, 1.09152618e+01, 5.15971606e-01,
       1.29241349e+01, 2.48580302e+00, 1.01109139e+00, 3.26543922e+01,
       2.67524401e-01, 1.33229814e+00, 2.04214729e+00, 1.81455191e-01,
       3.72803904e+00, 2.18380657e+01, 2.04214729e+00, 6.40195209e-01,
       6.30124224e+00, 1.97883762e+01, 4.70408163e+00, 4.89352263e-01,
       1.49525288e+01, 1.59849157e+00, 4.15195209e+01, 1.90785271e+01,
       1.03828749e+01, 2.04214729e+00, 1.33096717e-03, 2.04538598e+01,
       3.37311446e+00, 7.11180124e-01, 5.32519965e+00, 2.67524401e-01,
       2.55559006e+01, 1.90554615e+01, 3.65914479e+01, 8.89458086e+00,
       1.53636749e+01, 3.72946655e+00, 4.27180356e-01, 1.64690940e-01,
       8.89458086e+00, 4.63717189e+00, 1.27392041e+00, 2.15495343e-01,
       9.35224386e-01, 3.81033023e-03, 7.30186283e+00, 1.05880610e+01,
       1.46913209e+02, 7.87044877e-01, 8.51354784e+00, 2.86748518e+00,
       3.89881456e+00, 2.70533446e-01, 7.20110076e+00, 6.18501270e+00,
       5.11854361e-01, 2.68878069e+01, 5.08425064e+00, 1.31282811e+01,
       1.92574257e+00, 5.49834983e+00, 2.75082508e+00, 6.87623762e+00,
       1.12211221e-01, 9.78465347e+00, 4.68481848e+00, 8.72937294e-01,
       2.58580858e+00, 7.00495050e-01, 7.70132013e+00, 1.53267327e+01,
       4.40594059e-01, 2.75082508e+00, 9.35148515e+00, 1.39157591e+02,
       2.31303630e+01, 1.37656766e+01, 2.80528053e-02, 2.75577558e-01,
       1.22392739e+01])

from scipy.stats import genpareto
shape_fit, loc_fit, scale_fit = genpareto.fit(exceedances, floc=0)


def gpd_neg_log_likelihood(params, data):
    xi, beta = params[0], params[1]
    if beta <= 0:
        return np.inf
    if xi == 0:
        log_likelihood = -np.sum(np.log(1 / beta) + data / beta)
    else:
        log_likelihood = -np.sum(
            -np.log(beta) - (1 + (1 / xi)) * np.log(1 + (xi * data) / beta)
        )
    return -log_likelihood

result_global = differential_evolution(
    gpd_neg_log_likelihood,
    bounds,
    args=(exceedances,),
    strategy="best2bin",
    popsize=50,
    maxiter=1000,
    tol=1e-6,
    seed=42
)

exedences = 
array([3.22527379e+00, 2.39897511e+00, 1.82064422e+00, 5.87481698e+00,
       2.07146413e+01, 8.62207613e+00, 6.21303075e+00, 6.96559297e+00,
       3.75901903e-01, 1.20695461e+01, 1.82064422e+00, 1.82064422e+00,
       8.12079063e+00, 1.64619327e+01, 6.38521230e-02, 2.57750366e+02,
       4.38286969e+00, 8.19913616e-01, 2.11346999e+00, 1.17481698e+00,
       1.35702782e+01, 8.25036603e-01, 3.56515373e-01, 1.39729136e+01,
       1.41563041e+00, 2.75688073e+00, 2.86415465e+01, 1.38845347e+00,
       3.67431193e+00, 1.54698558e+01, 3.05176933e+00, 7.39515072e+00,
       3.27762779e+00, 5.07254260e+01, 2.36369594e+00, 2.05832241e+00,
       4.72280472e+00, 9.17889908e+00, 4.37758847e-01, 1.60987025e+00,
       2.19331586e+00, 2.95373526e+00, 9.22018349e-01, 4.45655308e+01,
       1.98582640e-01, 4.67802378e+00, 5.32045184e+00, 1.94702735e+01,
       6.39060642e+00, 1.95600476e-01, 1.57491082e+00, 1.45499405e+01,
       7.72889417e-03, 6.02074911e+01, 2.17625945e+01, 1.04262782e+01,
       1.67582259e+01, 8.85850178e-02, 7.34705228e-01, 6.17352614e-02,
       4.51112347e+00, 4.57230256e+00, 2.49207008e+00, 4.25472747e-01,
       7.13181313e+00, 1.41655395e+00, 1.72969967e-01, 7.84816463e+00,
       5.93159066e+00, 5.62347052e+00, 3.81413613e+00, 6.12303665e+00,
       8.79319372e+00, 8.12303665e+00, 1.31464838e+01, 1.03115183e+01,
       1.36623037e+01, 1.33481675e+01, 1.73560209e+00, 2.14397906e+00,
       2.03926702e+00, 1.66375670e+01, 1.08000000e+01, 6.43355000e-01,
       4.10000000e+01, 7.00000000e-01, 1.58500000e+00, 8.06654000e-01,
       1.06300000e+00, 5.00000000e+00, 3.70000000e+00, 6.72500000e+00,
       8.73900000e+00, 5.19106360e+01, 3.50000000e-01, 8.80000000e+00,
       6.67000000e-01, 8.00000000e+00, 1.20000000e+00, 5.20000000e+00,
       1.39000000e+01, 1.73000000e+00, 3.21000000e+00, 1.00000000e-01,
       1.29845709e+00, 6.55400193e+00, 1.09416586e+01, 2.35260366e+01,
       7.03616201e+00, 2.85920926e-01, 2.85920926e-01, 5.75216972e-01,
       1.28220829e+01, 2.85920926e-01, 9.30568949e-02, 3.17888139e+00,
       2.60028930e+00, 4.77000964e+00, 1.15684667e+01, 1.22434908e+01,
       1.77838590e+01, 1.08627087e+00, 1.64285714e+00, 5.63172542e+00,
       7.02319109e+00, 2.51391466e-01, 6.58627087e-02, 2.51391466e-01,
       3.31261596e+00, 2.69675325e+01, 2.35371058e+01, 1.30528757e+01,
       1.13831169e+01, 2.29220779e+00, 2.23293135e+01, 3.99814471e-01,
       6.55936920e+00, 8.07977737e-01, 3.96196660e+00, 2.10667904e+00,
       6.09554731e+00, 1.60111317e+00, 1.09152618e+01, 5.15971606e-01,
       1.29241349e+01, 2.48580302e+00, 1.01109139e+00, 3.26543922e+01,
       2.67524401e-01, 1.33229814e+00, 2.04214729e+00, 1.81455191e-01,
       3.72803904e+00, 2.18380657e+01, 2.04214729e+00, 6.40195209e-01,
       6.30124224e+00, 1.97883762e+01, 4.70408163e+00, 4.89352263e-01,
       1.49525288e+01, 1.59849157e+00, 4.15195209e+01, 1.90785271e+01,
       1.03828749e+01, 2.04214729e+00, 1.33096717e-03, 2.04538598e+01,
       3.37311446e+00, 7.11180124e-01, 5.32519965e+00, 2.67524401e-01,
       2.55559006e+01, 1.90554615e+01, 3.65914479e+01, 8.89458086e+00,
       1.53636749e+01, 3.72946655e+00, 4.27180356e-01, 1.64690940e-01,
       8.89458086e+00, 4.63717189e+00, 1.27392041e+00, 2.15495343e-01,
       9.35224386e-01, 3.81033023e-03, 7.30186283e+00, 1.05880610e+01,
       1.46913209e+02, 7.87044877e-01, 8.51354784e+00, 2.86748518e+00,
       3.89881456e+00, 2.70533446e-01, 7.20110076e+00, 6.18501270e+00,
       5.11854361e-01, 2.68878069e+01, 5.08425064e+00, 1.31282811e+01,
       1.92574257e+00, 5.49834983e+00, 2.75082508e+00, 6.87623762e+00,
       1.12211221e-01, 9.78465347e+00, 4.68481848e+00, 8.72937294e-01,
       2.58580858e+00, 7.00495050e-01, 7.70132013e+00, 1.53267327e+01,
       4.40594059e-01, 2.75082508e+00, 9.35148515e+00, 1.39157591e+02,
       2.31303630e+01, 1.37656766e+01, 2.80528053e-02, 2.75577558e-01,
       1.22392739e+01])

Share Improve this question asked yesterday Antek 12 bronze badges New contributor Antek is a new contributor to this site. Take care in asking for clarification, commenting, and answering. Check out our Code of Conduct.

3 You will get a much better behaved problem if you substitute np.log1p(x) for np.log(1+x) . The latter naive algebraic form has terrible rounding error for small x. See np.log1p. – Martin Brown Commented yesterday
1 "I would like to get parameter error (sensitivity)" If you mean "interval confidence", you could estimate it with the bootstrap method. – Olivier Commented yesterday
And how do you implement bootstrap method? Could you provide some reference? – Antek Commented yesterday
The documentation is a good reference for that: docs.scipy.org/doc/scipy/reference/generated/… – Nick ODell Commented yesterday

Add a comment |

1 Answer 1

Sorted by: Reset to default 0

Assuming your data is stored in exceedences, you can produce bootstrap confidence intervals of the parameters like:

from scipy import stats

def statistic(x):
    params = stats.genpareto.fit(x, floc=0)
    return params[0], params[2]

res = stats.bootstrap((exceedences,), statistic)
res.confidence_interval
# ConfidenceInterval(low=array([0.37175838, 3.1441693 ]), high=array([0.89366334, 5.76439797]))

This takes a while, so another option is to use the observed Fisher information to estimate confidence intervals. See, for example, equation 3.15 of https://www.stat.umn.edu/geyer/s06/5102/notes/fish.pdf.

import numpy as np
from scipy import differentiate

def llf(p):  # vectorized log-likelihood function
    shape, scale = (pi[..., np.newaxis] for pi in p)
    return np.sum(dist.logpdf(exceedences, shape, 0, scale), axis=-1)

dist = stats.genpareto
params = dist.fit(exceedences, floc=0)
p = np.asarray([params[0], params[2]])

# The observed Fisher information matrix is the negative Hessian
# of the LLF evaluated at the MLE parameters. 
res = differentiate.hessian(llf, p, initial_step=0.1)
J = -res.ddf
# To get the standard error of each parameter, invert this matrix,
# extract the diagonal elements, and take the square root.
cov = np.linalg.inv(J)
se = np.sqrt(np.diag(cov))
# These can be used to estimate confidence intervals.
alpha = 0.05
z = stats.norm.isf(alpha/2)
p - z*se, p + z*se
# (array([0.36907457, 3.22570315]), array([0.83608425, 5.44687804]))

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

scipy - Fitting GPD (Generalized Pareto Distribution) in Python - Stack Overflow

1 Answer 1

与本文相关的文章

评论列表(0)