Пример #1
0
def test_ic():
    #test information criteria
    #consistency check

    ics = [aic, aicc, bic, hqic]
    ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma]

    for ic, ic_sig in zip(ics, ics_sig):
        assert_(ic(np.array(2),10,2).dtype == np.float, msg=repr(ic))
        assert_(ic_sig(np.array(2),10,2).dtype == np.float, msg=repr(ic_sig) )

        assert_almost_equal(ic(-10./2.*np.log(2.),10,2)/10,
                            ic_sig(2, 10, 2),
                            decimal=14)

        assert_almost_equal(ic_sig(np.log(2.),10,2, islog=True),
                            ic_sig(2, 10, 2),
                            decimal=14)


    #examples penalty directly from formula
    n, k = 10, 2
    assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14)
    #next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
def test_ic():
    #test information criteria
    #consistency check

    ics = [aic, aicc, bic, hqic]
    ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma]

    for ic, ic_sig in zip(ics, ics_sig):
        assert_(ic(np.array(2), 10, 2).dtype == np.float, msg=repr(ic))
        assert_(ic_sig(np.array(2), 10, 2).dtype == np.float, msg=repr(ic_sig))

        assert_almost_equal(ic(-10. / 2. * np.log(2.), 10, 2) / 10,
                            ic_sig(2, 10, 2),
                            decimal=14)

        assert_almost_equal(ic_sig(np.log(2.), 10, 2, islog=True),
                            ic_sig(2, 10, 2),
                            decimal=14)

    #examples penalty directly from formula
    n, k = 10, 2
    assert_almost_equal(aic(0, 10, 2), 2 * k, decimal=14)
    #next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2 * k * (k + 1.) / (n - k - 1.),
                        decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n) * k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2 * np.log(np.log(n)) * k, decimal=14)
Пример #3
0
def test_ic():
    # test information criteria

    # examples penalty directly from formula
    n = 10
    k = 2
    assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14)
    # next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
Пример #4
0
def test_ic():
    # test information criteria

    # examples penalty directly from formula
    n = 10
    k = 2
    assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14)
    # next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
Пример #5
0
def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum(
        (res.standardized_forecasts_error[0, 1:] *
         res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(
        norm.pdf(res.resid_recursive, loc=0,
                 scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)
Пример #6
0
def test_glm(constraints=None):
    # More comprehensive tests against GLM estimates (this is sort of redundant
    # given `test_ols`, but this is mostly to complement the tests in
    # `test_glm_constrained`)
    endog = dta.infl
    exog = add_constant(dta[['unemp', 'm1']])

    mod = RecursiveLS(endog, exog, constraints=constraints)
    res = mod.fit()

    mod_glm = GLM(endog, exog)
    if constraints is None:
        res_glm = mod_glm.fit()
    else:
        res_glm = mod_glm.fit_constrained(constraints=constraints)

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_glm.params)
    assert_allclose(res.bse, res_glm.bse, atol=1e-6)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale)

    # DoF
    # Note: GLM does not include intercept in DoF, so modify by -1
    assert_equal(res.df_model - 1, res_glm.df_model)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_glm.resid_response, atol=1e-7)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can construct the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum(
        (res.standardized_forecasts_error[0, 1:] *
         res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(
        norm.pdf(res.resid_recursive, loc=0,
                 scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_glm.llf)

    # Prediction
    # TODO: prediction in this case is not working.
    if constraints is None:
        design = np.ones((1, 3, 10))
        actual = res.forecast(10, design=design)
        assert_allclose(actual, res_glm.predict(np.ones((10, 3))))
    else:
        design = np.ones((2, 3, 10))
        assert_raises(NotImplementedError, res.forecast, 10, design=design)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_glm.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_glm.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_glm.aic)
Пример #7
0
def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)
Пример #8
0
def test_glm(constraints=None):
    # More comprehensive tests against GLM estimates (this is sort of redundant
    # given `test_ols`, but this is mostly to complement the tests in
    # `test_glm_constrained`)
    endog = dta.infl
    exog = add_constant(dta[['unemp', 'm1']])

    mod = RecursiveLS(endog, exog, constraints=constraints)
    res = mod.fit()

    mod_glm = GLM(endog, exog)
    if constraints is None:
        res_glm = mod_glm.fit()
    else:
        res_glm = mod_glm.fit_constrained(constraints=constraints)

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_glm.params)
    assert_allclose(res.bse, res_glm.bse, atol=1e-6)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale)

    # DoF
    # Note: GLM does not include intercept in DoF, so modify by -1
    assert_equal(res.df_model - 1, res_glm.df_model)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_glm.resid_response, atol=1e-7)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can construct the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_glm.llf)

    # Prediction
    # TODO: prediction in this case is not working.
    if constraints is None:
        design = np.ones((1, 3, 10))
        actual = res.forecast(10, design=design)
        assert_allclose(actual, res_glm.predict(np.ones((10, 3))))
    else:
        design = np.ones((2, 3, 10))
        assert_raises(NotImplementedError, res.forecast, 10, design=design)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_glm.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_glm.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_glm.aic)
Пример #9
0
 def aic(self):
     """
     (float) Akaike Information Criterion
     """
     # return -2*self.llf + 2*self.params.shape[0]
     return aic(self.llf, self.nobs, self.params.shape[0])
                      '+ C(Thyroid_medication)'
                      '+ C(Drink_alcohol_more_than_once_a_week)'
                      '+ N_previous_admissions'
                      '+ PSQI_sleep_quality_index_global_score'
                      '+ Mean_right_hippocampal_volume'
                      '+ Intracranial_volume'
                      '+ Cholesterol'
                      '+ C(Sex)'
                      '+ Age'
                      '+ C(Smoke)'
                      '+ BMI'
                      '+ med_index',
                      data=logit_df)

    result = model.fit()
    main_effect_aic = aic(llf=-152.36, nobs=322, df_modelwc=22)
    print(result.summary())
    print(np.exp(result.params))
    print(np.exp(result.conf_int()))
    print(main_effect_aic)

    # Getting a dataframe of results
    results_df = logit_results_to_dataframe(results=result)
    logit_p_vals = result.pvalues  # Adding in corrected p values
    corrected_log_p_vals = multipletests(pvals=logit_p_vals,
                                         alpha=0.05,
                                         method='fdr_bh')
    corrected_log_p_vals = list(corrected_log_p_vals[1])
    results_df['fdr_pvals'] = corrected_log_p_vals

    # Re-ordering the output and exporting
Пример #11
0
 def aic(self):
     """
     (float) Akaike Information Criterion
     """
     return aic(self.llf, self.nobs_effective, self.df_model)
Пример #12
0
 def aic(self):
     """
     (float) Akaike Information Criterion
     """
     # return -2*self.llf + 2*self.params.shape[0]
     return aic(self.llf, self.nobs, self.params.shape[0])