Пример #1
0
    def test_acorr_breusch_godfrey(self):
        res = self.res

        #bgf = bgtest(fm, order = 4, type="F")
        breuschgodfrey_f = dict(statistic=1.179280833676792,
                                pvalue=0.321197487261203,
                                parameters=(
                                    4,
                                    195,
                                ),
                                distr='f')

        #> bgc = bgtest(fm, order = 4, type="Chisq")
        #> mkhtest(bgc, "breuschpagan_c", "chi2")
        breuschgodfrey_c = dict(statistic=4.771042651230007,
                                pvalue=0.3116067133066697,
                                parameters=(4, ),
                                distr='chi2')

        bg = smsdia.acorr_breusch_godfrey(res, nlags=4)
        bg_r = [
            breuschgodfrey_c['statistic'], breuschgodfrey_c['pvalue'],
            breuschgodfrey_f['statistic'], breuschgodfrey_f['pvalue']
        ]
        assert_almost_equal(bg, bg_r, decimal=13)

        # check that lag choice works
        bg2 = smsdia.acorr_breusch_godfrey(res, nlags=None)
        bg3 = smsdia.acorr_breusch_godfrey(res, nlags=14)
        assert_almost_equal(bg2, bg3, decimal=13)
Пример #2
0
def acorr_breusch_godfrey(resid, nlags=None):
    """
    Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation.
    documentation can be found here:
    https://www.statsmodels.org/stable/generated/statsmodels.stats.diagnostic.acorr_breusch_godfrey.html

    This test looks for serial correlation in a timeseries.

    Definition: serial correlation :=
    Serial or auto correlation is a correlation of
    a signal with a delayed copy of itself.  
    
    The metric of correlation is the Pearson correlation
    and indicates a relationship with previous measurements
    in the series.  The presence of serial correlation can be used
    to understand periodicity.

    See this for more details:
    https://www.mathworks.com/help/signal/ug/find-periodicity-using-autocorrelation.html

    Null hypothesis:
    There is no serial correlation up to nlags.
    
    Alternative hypothesis:
    There is serial correlation.
    
    Parameters
    ----------
    resid : pd.Series
        Estimation results for which the residuals are tested for serial
        correlation.
    nlags : int, default None
        Number of lags to include in the auxiliary regression. (nlags is
        highest lag).
        if nlags is set to None then nlags is:
        ```
        nlags = np.trunc(12. * np.power(nobs / 100., 1 / 4.))
        nlags = int(nlags)
        ```

    Returns
    -------
    lm : float
        Lagrange multiplier test statistic.
    lmpval : float
        The p-value for Lagrange multiplier test.
    """
    result = diagnostic.acorr_breusch_godfrey(resid)
    AcorrBreuschGodfreyResult = namedtuple('BreuschGodfreyResult',
                                           'statistic pvalue')
    return AcorrBreuschGodfreyResult(result[0], result[1])
Пример #3
0
    def test_acorr_breusch_godfrey(self):
        res = self.res

        #bgf = bgtest(fm, order = 4, type="F")
        breuschgodfrey_f = dict(statistic=1.179280833676792,
                               pvalue=0.321197487261203,
                               parameters=(4,195,), distr='f')

        #> bgc = bgtest(fm, order = 4, type="Chisq")
        #> mkhtest(bgc, "breuschpagan_c", "chi2")
        breuschgodfrey_c = dict(statistic=4.771042651230007,
                               pvalue=0.3116067133066697,
                               parameters=(4,), distr='chi2')

        bg = smsdia.acorr_breusch_godfrey(res, nlags=4)
        bg_r = [breuschgodfrey_c['statistic'], breuschgodfrey_c['pvalue'],
                breuschgodfrey_f['statistic'], breuschgodfrey_f['pvalue']]
        assert_almost_equal(bg, bg_r, decimal=13)

        # check that lag choice works
        bg2 = smsdia.acorr_breusch_godfrey(res, nlags=None)
        bg3 = smsdia.acorr_breusch_godfrey(res, nlags=14)
        assert_almost_equal(bg2, bg3, decimal=13)
def get_bgod(model: pd.DataFrame, lags: int) -> tuple:
    """Calculate test statistics for autocorrelation

    Parameters
    ----------
    model : OLS Model
        Model containing residual values.
    lags : int
        The amount of lags.

    Returns
    -------
    Test results from the Breusch-Godfrey Test
    """

    lm_stat, p_value, f_stat, fp_value = acorr_breusch_godfrey(model,
                                                               nlags=lags)

    return lm_stat, p_value, f_stat, fp_value
Пример #5
0
 def test_acorr_breusch_godfrey_multidim(self):
     res = Bunch(resid=np.empty((100, 2)))
     with pytest.raises(ValueError, match='Model resid must be a 1d array'):
         smsdia.acorr_breusch_godfrey(res)
Пример #6
0
def error_analisis(result, plot=False):
    '''
    Inputs:
        result: Results from Stats after model.fit()
        plot: True if we want a plot
    Returns:
        Print of an statistics analysis of regression errors which includes: Autocorrelation, \
        Heterokedasticity, Stationarity and Normality
    '''
    #Autocorrleation
    print('----------Durbin Watson-------------')
    out = durbin_watson(result.resid)
    print('Durbin Watson is: ' + str(out))

    if plot:
        qqplot(result.resid, line='s')
        pyplot.show()

    print('--------Breusch Autocorr-----------')

    try:
        bre = acorr_breusch_godfrey(result, nlags=12)

        print('lm: ' + str(bre[0]))
        print('lmpval: ' + str(bre[1]))
        print('fval: ' + str(bre[2]))
        print('fpval: ' + str(bre[3]))

        if bre[1] < 0.05:
            print('Evidence for autocorrelation')
        else:
            print('Not Evidence for autocorrelation')
    except:
        print('Cant calculate statistic')

    print('-----White Heteroskedasticity------')

    white_test = het_white(result.resid, result.model.exog)

    labels = [
        'LM Statistic', 'LM-Test p-value', 'F-Statistic', 'F-Test p-value'
    ]
    print(dict(zip(labels, white_test)))

    if white_test[1] < 0.05:
        print('Evidence for heteroskedasticity')
    else:
        print('Not Evidence for heteroskedasticity')

    print('----------ADF Test-----------------')
    try:
        DFtest(result.resid)
    except:
        print("Can't calculate ADF test")

    print('----------Shapiro Normality--------')
    stat, p = shapiro(result.resid)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    # interpret
    alpha = 0.05
    if p > alpha:
        print('Sample looks Gaussian (fail to reject H0)')
    else:
        print('Sample does not look Gaussian (reject H0)')

    if plot:
        residuals = pd.DataFrame(result.resid)
        plt.show()
        residuals.plot(kind='kde')
        plt.show()
Пример #7
0
#8. granger causality test 
granger_result = grangercausalitytests(dataframe, maxlag=2)
print(granger_result)

#The Null hypothesis for grangercausalitytests is that the time series
#in the second column, x2, does NOT Granger cause the time series in
#the first column, x1.

#-----------------------------------------------------------------
#9. Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation
 
#resid
resid = mod.resid()
print(resid)

acorr_result = acorr_breusch_godfrey(resid, nlags=2)
print(acorr_result)

'''Returns:	
lm (float) – Lagrange multiplier test statistic
lmpval (float) – p-value for Lagrange multiplier test
fval (float) – fstatistic for F test,
fval (float) – pvalue for F test'''

#-----------------------------------------------------------------

''' Take another model into consideration
#9. VECM model

#built data
newdata = {'goldPrice': gold_data,'stockIndex': stock_data}
Пример #8
0
 def acorr_breusch_godfrey(self, timeseries):
     model, model_result = self.generate_model(timeseries)
     result = diagnostic.acorr_breusch_godfrey(model_result)
     AcorrBreuschGodfreyResult = namedtuple('BreuschGodfreyResult', 'statistic pvalue')
     return AcorrBreuschGodfreyResult(result[0], result[1])
Пример #9
0
def ols_diag(df,X,model, nlag=1, remove_outliers=False):
    
    ### Small Info
    print("Dataset:","\t",len(df))
    print("X:","\t",len(X))
    
    ## Residdual Normalaity Test
    print("1. Normality Test: ", "Jarque-Bera", "Test")
    jb_h0="Residual Normally distributed"
    jb_h1="Residual Not Normally distributed"
    jb_p=smt.jarque_bera(model.resid)[1]
    hypo_out(jb_p, jb_h0, jb_h1)

    ## Data Linearity Test
    print("2. Linearity Test: ","Rainbow", "Test")
    r_h0="Data have linear relationship"
    r_h1="Data do not have linear relationship"
    r_t,r_p=smd.linear_rainbow(model)
    hypo_out(r_p, r_h0, r_h1)
    
    
    ## Hetrosedacity Test: Scaling error
    print("3. Heteroscedasticity Test: ","Breusch-Pagan", "Test")
    bp_h0="Data have same variance accross"
    bp_h1="Data do not have have same variance accross"
    bp_p=smd.het_breuschpagan(model.resid, model.model.exog)[1]
    hypo_out(bp_p, bp_h0, bp_h1)
    
    ## Autocrrelation Test
    print("4. Autocorrelation Test: ","Breusch Godfrey", "Test")
    bg_h0="Data are not related to themself:"+str(nlag)+" lag"
    bg_h1="Data are related to themself by:"+str(nlag)+" lag"
    bg_p=smd.acorr_breusch_godfrey(model, nlag)[1]
    hypo_out(bg_p, bg_h0, bg_h1)
    
    ## Sum residulas =0 
    print("5. Sum of residuals == 0")
    sr_h0="Sum of residuals = 0"
    sr_h1="Sum of residual != 0"
    if round(sum(model.resid),1)==0:
        sr_p=1
    else:
        sr_p=0
    hypo_out(sr_p, sr_h0, sr_h1)
    
    
    ## List of outliers 
    print("6. Checking outliers:")
    outliers(df,model,remove_outliers=False)
  

    ## Endogenity Check: 
#     print("7. Checking Endogenity:"; )
#     heatmap(X)

    ## Multicolinearity test: 
    print("7. Checking multicolinearity")
    try: 
        heatmap(X)
    except:
        print("Cannot perrform this test")
Si el estadistico, esta entre 1,038 y 2,962 entonces podemos concluir
que los errores no estan autocorrelacionados. En efecto como el estadistico
esta en ese intervalo, podemos decir que los errores no estan autocorrelacionados '''

#Pero que sucede si los errores estan correlacionados en otros errores anteriores al inmediato error anterior?, la prueba de durbin-watson no responde este problema
#Lo cual una prueba mas general es utilizar la prueba de Breusch Godfrey
#Entonces tendriamos un modelo autorregresivo de la siguiente forma.
# ei = X * B + p1*ei-1 + p2*ei-2 + p3*ei-3 + ... + pm*ei-m + Vi
#Las hipotesis planteadas son las siguientes

# Ho: p1=0, p2=0, p3=0... pm=0
# H1: p1!=0, p2!=0, p3!=0...pm!=0

from statsmodels.stats.diagnostic import acorr_breusch_godfrey
acorr_breusch_godfrey(
    modelo
)  #Obtenemos un p valor mayor al 5% por lo tanto podemos concluir que los errores no estan
#Autocorrelacionados
''' Aun asi puede que el modelo no se encuentre bien especificado.
Puede que omitimos una variable e el modelo, puede que no sea una funcion lineal,
por lo tanto, el test que nos permitiria saber eso, es el test de ramsey:
Vamos a crear un modelo auxiliar, en donde tenemos en cuenta que el modelo puede ser especificado en forma polinomica

y = B0 + B1*X1 + B2*y_estimado^2 + u por ejemplo

H0: el modelo esta bien especificado
H1: el modelo no esta bien especificado '''

from statsmodels.stats.diagnostic import linear_reset

linear_reset(
    def check_error_term_autocorrelation(self) -> bool:
        """
        Checks correlation between the observations of error term by:
        - Durbin-Watson's statistical test,
        - Breusch-Godfrey's statistical test.
        If:
         - silent_mode = True, method returns:
                                              a) True (which means that the assumption is
                                                 fulfilled) if the percentage of statistical tests
                                                 for which the assumption is fulfilled is higher
                                                 than or equal to set min_fulfill_ratio
                                              b) False (which means that the assumption is not
                                                 fulfilled) if the percentage of statistical tests
                                                 for which the assumption is fulfilled is lower
                                                 than set min_fulfill_ratio
         - silent_mode = False, method returns True/False as above and shows additional statistics,
         descriptions which are helpful in assessing the fulfilment of assumption
        """

        durbin_watson_statistic = durbin_watson(self.residuals)

        bg_test = pd.DataFrame(
            stats_diag.acorr_breusch_godfrey(self.results)[:2],
            columns=["value"],
            index=["Lagrange multiplier statistic", "p-value"])

        true_counts = 0
        lower_threshold_dw_stat = 1.5
        upper_threshold_dw_stat = 2.5
        if lower_threshold_dw_stat < durbin_watson_statistic < upper_threshold_dw_stat:
            true_counts = true_counts + 1

        true_counts = true_counts + test_hypothesis(
            significance_level=self.alpha,
            p_value=bg_test.iloc[1].value,
            print_outcome=False)

        true_ratio = true_counts / 2

        if not self.silent_mode:

            print(
                Color.BOLD +
                "Assumption 4. Observations of the error term are uncorrelated with "
                "each other." + Color.END, "\n")

            print("This assumption affects on: \n", "- prediction \n",
                  "- interpretation.", "\n")

            print(
                "One observation of the error term should not predict the next observation. To "
                "resolve this issue, you might need to add an independent variable to the model "
                "that captures this information. Analysts commonly use distributed lag models, "
                "which use both current values of the dependent variable and past values of "
                "independent variables.\n")

            print(
                Color.BOLD + "Durbin-Watson " + Color.END +
                "statistical test: \n",
                "If the value of the statistics equals 2 => no serial correlation. \n",
                "If the value of the statistics equals 0 => strong positive correlation. \n",
                "If the value of the statistics equals 4 => strong negative correlation. \n"
            )
            print("The value of Durbin-Watson statistic is " +
                  f"{np.round(durbin_watson(self.residuals), 4)}\n")
            true_counts = 0
            if durbin_watson_statistic < lower_threshold_dw_stat:
                print("Signs of positive autocorrelation =>" + Color.RED +
                      " Assumption not satisfied" + Color.END + "\n")
            elif durbin_watson_statistic > upper_threshold_dw_stat:
                print("Signs of negative autocorrelation =>" + Color.RED +
                      " Assumption not satisfied" + Color.END + "\n")
            else:
                print("Little to no autocorrelation =>" + Color.GREEN +
                      " Assumption satisfied" + Color.END + "\n")
                true_counts = true_counts + 1

            print(Color.BOLD + "Breusch-Godfrey " + Color.END +
                  "Lagrange Multiplier statistical tests: \n")
            print(bg_test, "\n")

            true_counts = true_counts + test_hypothesis(
                significance_level=self.alpha,
                p_value=bg_test.iloc[1].value,
                null_hypothesis="there doesn't exist "
                "autocorrelation in the "
                "error term.")
            true_ratio = true_counts / 2
            check_fulfill_ratio(true_fulfill_ratio=true_ratio,
                                min_fulfill_ratio=self.min_fulfill_ratio)

        return check_fulfill_ratio(true_fulfill_ratio=true_ratio,
                                   min_fulfill_ratio=self.min_fulfill_ratio,
                                   print_outcome=False)
# Os p-values estão no segundo grupo de valores
from statsmodels.stats.diagnostic import acorr_ljungbox
lb = acorr_ljungbox(resid, lags=10)
print(
    "\n", 'Teste de Ljung-Box de independência dos resíduos:', lb[1]
)  # A partir do 7o lag, p-value < 0.05 => esses lags apresentam auto-correlação, o que viola
# o pressuposto de independência dos resíduos

## Teste de Durbin-Watson para autocorrelação dos resíduos. H_0: resíduos não têm autocorrelação com o seu 1o lag
#from statsmodels.stats.stattools import durbin_watson
#DW = durbin_watson(resid)
#print("\n", 'Teste de Durbin-Watson de independência dos resíduos:', DW[0])

# Teste de Breush-Godfrey para autocorrelação dos resíduos. H_0: resíduos não têm correlação com os seus "n" lags (neste caso definimos n = 10)
from statsmodels.stats.diagnostic import acorr_breusch_godfrey
bg = acorr_breusch_godfrey(model_output, nlags=10)
print(
    "\n", 'Teste de Breush-Godfrey de independência dos resíduos:', bg
)  # P-value (2o valor dos 4 apresentados) < 0.05 => existe autocorrelação dos resíduos

# Teste de heterocedasticidade ARCH. H_0: variância é constante. O segundo valor é o p-value
from statsmodels.stats.diagnostic import het_arch
archTest = het_arch(resid[0], maxlag=5, autolag=None)
print("\n", 'Teste ARCH de heterocedasticidade:', archTest[1]
      )  # P-value < 0.05 => rejeita-se H_0 => variância não é constante

### GJR-GARCH Model ### AR(2) + GJR-GARCH(1,1)
from arch import arch_model

gjrGarch = arch_model(tsReturns, mean="ARX", lags=2,
                      o=1)  # importa as 3 equações ao mesmo tempo
Пример #13
0
def breusch_godfrey():
    names = [
        "Lagrange multiplier statistic", "p-value", "f-value", "f p-value"
    ]
    test = acorr_breusch_godfrey(res)
    print(dict(zip(names, test)))
Пример #14
0
#Money Supply

meanMS = np.mean(DataUse.MS)
sdMS = np.std(DataUse.MS)
varMS = np.var(DataUse.MS)


#Create Model

Model = sma.wls('work.inflation ~ foreign + MS', work).fit()

print(Model.summary())

#Weighted Least Squares used to fix Heteroscedastisity

#Test The Model

Heteroscedastisity = ds.het_white(Model.resid, exog = work)

print('F-statistic %r' % Heteroscedastisity[2])
print('Prob,F %f' % Heteroscedastisity[3])
print('Chi-Square %s' % Heteroscedastisity[0])
print('Prob,Chi-Square %g' % Heteroscedastisity[1])

Autocorrelation = ds.acorr_breusch_godfrey(Model, nlags=(2))

print('F-statistic %r' % Autocorrelation[2])
print('Prob,F %f' % Autocorrelation[3])
print('Chi-Square %s' % Autocorrelation[0])
print('Prob,Chi-Square %g' % Autocorrelation[1])