def break_test(endog, exog, nbreaks, trim=0.15, vcov=None): """ Test that `nbreaks` exists in the sample. TODO again, better cache the SSRs TODO add support for p-value calculation (Hansen 1997) Parameters ---------- endog : array-like The endogenous variable. exog : array-like The exogenous matrix. nbreaks : integer The number of breakpoints in the null hypothesis trim : float or int, optional If a float, the minimum percentage of observations in each regime, if an integer, the minimum number of observations in each regime. vcov : callback, optional Optionally provide a callback to modify the variance / covariance matrix used in calculating the test statistic. Returns ------- fstat : float The test statistic. crits : iterable The critical values. """ nobs = len(endog) if trim < 1: trim = int(np.floor(trim * nobs)) exog = np.asarray(exog) # TODO Is there a better way to test for and fix this? (the problem is # that if the exog argument is a list, so that exog is 1dim, # np.concatenate fails to create a matrix, instead just makes a # long vector) if exog.ndim == 1: exog = exog[:, None] breakpoints, ssr = find_breakpoints(endog, exog, nbreaks, trim) built_exog, regime_indicators, nobs_regimes = build_exog(exog, breakpoints) res = OLS(endog, built_exog).fit() q = exog.shape[1] # number of parameters subject to break, hard-coded to entire exog for now p = 0 # number of parameters not subject to break, hard-coded to zero for now R = np.zeros((nbreaks, nbreaks+1)) R[np.diag_indices(nbreaks)] = [-1]*nbreaks R[tuple(np.diag_indices(nbreaks) + np.array([[0]*nbreaks, [1]*nbreaks]))] = [1]*nbreaks Rd = R.dot(res.params[:, None]) V = vcov(res) if vcov else res.cov_params() const = (nobs - (nbreaks+1)*q - p) / (nobs*nbreaks*q) fstat = const * Rd.T.dot(np.linalg.inv(R.dot(V).dot(R.T))).dot(Rd) return fstat
def ocsb_test_value(diff_series, x_reg, period): try: fit = OLS(diff_series, x_reg).fit() except ValueError: # Regression Model cannot be fit return -np.inf t2 = np.sqrt(fit.cov_params()["x2"]["x2"]) return fit.params["x2"] / t2
def test_compatibility(self): """Hypothesis test for the compatibility of prior mean with data """ # TODO: should we store the OLS results ? not needed so far, but maybe cache #params_ols = np.linalg.pinv(self.model.exog).dot(self.model.endog) #res = self.wald_test(self.model.r_matrix, q_matrix=self.model.q_matrix, use_f=False) #from scratch res_ols = OLS(self.model.endog, self.model.exog).fit() r_mat = self.model.r_matrix r_diff = self.model.q_matrix - r_mat.dot(res_ols.params)[:,None] ols_cov_r = res_ols.cov_params(r_matrix=r_mat) statistic = r_diff.T.dot(np.linalg.solve(ols_cov_r + self.model.sigma_prior, r_diff)) from scipy import stats df = np.linalg.matrix_rank(self.model.sigma_prior) # same as r_mat.shape[0] pvalue = stats.chi2.sf(statistic, df) # TODO: return results class return statistic, pvalue, df
def test_combine_subset_regression(self): # split sample into two, use first sample as prior for second endog = self.endog exog = self.exog nobs = len(endog) n05 = nobs // 2 np.random.seed(987125) # shuffle to get random subsamples shuffle_idx = np.random.permutation(np.arange(nobs)) ys = endog[shuffle_idx] xs = exog[shuffle_idx] k = 10 res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit() res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit() w = res_ols1.scale / res_ols0.scale #1.01 mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k), q_matrix=res_ols0.params, sigma_prior=w * res_ols0.cov_params()) res_1p = mod_1.fit(cov_type='data-prior') res_1s = mod_1.fit(cov_type='sandwich') res_olsf = OLS(ys, xs[:, :k]).fit() assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9) corr_fact = np.sqrt(res_1p.scale / res_olsf.scale) # corrct for differences in scale computation assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3) # regression test, does not verify numbers # especially why are these smaller than OLS on full sample # in larger sample, nobs=600, those were close to full OLS bse1 = np.array([ 0.26589869, 0.15224812, 0.38407399, 0.75679949, 0.66084200, 0.54174080, 0.53697607, 0.66006377, 0.38228551, 0.53920485 ]) assert_allclose(res_1s.bse, bse1, rtol=1e-7)
def test_combine_subset_regression(self): # split sample into two, use first sample as prior for second endog = self.endog exog = self.exog nobs = len(endog) n05 = nobs // 2 np.random.seed(987125) # shuffle to get random subsamples shuffle_idx = np.random.permutation(np.arange(nobs)) ys = endog[shuffle_idx] xs = exog[shuffle_idx] k = 10 res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit() res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit() w = res_ols1.scale / res_ols0.scale #1.01 mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k), q_matrix=res_ols0.params, sigma_prior=w * res_ols0.cov_params()) res_1p = mod_1.fit(cov_type='data-prior') res_1s = mod_1.fit(cov_type='sandwich') res_olsf = OLS(ys, xs[:, :k]).fit() assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9) corr_fact = 0.96156318 # corrct for differences in scale computation assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3) # regression test, does not verify numbers # especially why are these smaller than OLS on full sample # in larger sample, nobs=600, those were close to full OLS bse1 = np.array([ 0.27609914, 0.15808869, 0.39880789, 0.78583194, 0.68619331, 0.56252314, 0.55757562, 0.68538523, 0.39695081, 0.55988991 ]) assert_allclose(res_1s.bse, bse1, rtol=1e-7)
def test_combine_subset_regression(self): # split sample into two, use first sample as prior for second endog = self.endog exog = self.exog nobs = len(endog) n05 = nobs // 2 np.random.seed(987125) # shuffle to get random subsamples shuffle_idx = np.random.permutation(np.arange(nobs)) ys = endog[shuffle_idx] xs = exog[shuffle_idx] k = 10 res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit() res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit() w = res_ols1.scale / res_ols0.scale #1.01 mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k), q_matrix=res_ols0.params, sigma_prior=w * res_ols0.cov_params()) res_1p = mod_1.fit(cov_type='data-prior') res_1s = mod_1.fit(cov_type='sandwich') res_olsf = OLS(ys, xs[:, :k]).fit() assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9) corr_fact = np.sqrt(res_1p.scale / res_olsf.scale) # corrct for differences in scale computation assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3) # regression test, does not verify numbers # especially why are these smaller than OLS on full sample # in larger sample, nobs=600, those were close to full OLS bse1 = np.array([ 0.26589869, 0.15224812, 0.38407399, 0.75679949, 0.66084200, 0.54174080, 0.53697607, 0.66006377, 0.38228551, 0.53920485]) assert_allclose(res_1s.bse, bse1, rtol=1e-7)
def test_predict_se(): # this test doesn't use reference values # checks conistency across options, and compares to direct calculation # generate dataset nsample = 50 x1 = np.linspace(0, 20, nsample) x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)] np.random.seed(0) #9876789) #9876543) beta = [0.5, -0.01, 5.] y_true2 = np.dot(x, beta) w = np.ones(nsample) w[int(nsample * 6. / 10):] = 3 sig = 0.5 y2 = y_true2 + sig * w * np.random.normal(size=nsample) x2 = x[:, [0, 2]] # estimate OLS res2 = OLS(y2, x2).fit() #direct calculation covb = res2.cov_params() predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res2) np.testing.assert_almost_equal(prstd, predstd, 15) #stats.t.isf(0.05/2., 50 - 2) q = 2.0106347546964458 ci_half = q * predstd np.testing.assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12) np.testing.assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12) prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3, :]) np.testing.assert_equal(prstd, prstd[:3]) np.testing.assert_allclose(iv_u, res2.fittedvalues[:3] + ci_half[:3], rtol=1e-12) np.testing.assert_allclose(iv_l, res2.fittedvalues[:3] - ci_half[:3], rtol=1e-12) # check WLS res3 = WLS(y2, x2, 1. / w).fit() #direct calculation covb = res3.cov_params() predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res3) np.testing.assert_almost_equal(prstd, predstd, 15) #stats.t.isf(0.05/2., 50 - 2) q = 2.0106347546964458 ci_half = q * predstd np.testing.assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12) np.testing.assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12) # testing shapes of exog prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:, :], weights=3.) np.testing.assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, :], weights=3.) np.testing.assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=3.) np.testing.assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=[3, 3]) np.testing.assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3, :]) np.testing.assert_equal(prstd, prstd[:3]) np.testing.assert_allclose(iv_u, res3.fittedvalues[:3] + ci_half[:3], rtol=1e-12) np.testing.assert_allclose(iv_l, res3.fittedvalues[:3] - ci_half[:3], rtol=1e-12) #use wrong size for exog #prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,0], weights=3.) np.testing.assert_raises(ValueError, wls_prediction_std, res3, x2[-1, 0], weights=3.) # check some weight values sew1 = wls_prediction_std(res3, x2[-3:, :])[0]**2 for wv in np.linspace(0.5, 3, 5): sew = wls_prediction_std(res3, x2[-3:, :], weights=1. / wv)[0]**2 np.testing.assert_allclose(sew, sew1 + res3.scale * (wv - 1))
def test_predict_se(): # this test doesn't use reference values # checks conistency across options, and compares to direct calculation # generate dataset nsample = 50 x1 = np.linspace(0, 20, nsample) x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)] np.random.seed(0)#9876789) #9876543) beta = [0.5, -0.01, 5.] y_true2 = np.dot(x, beta) w = np.ones(nsample) w[int(nsample * 6. / 10):] = 3 sig = 0.5 y2 = y_true2 + sig * w * np.random.normal(size=nsample) x2 = x[:,[0,2]] # estimate OLS res2 = OLS(y2, x2).fit() #direct calculation covb = res2.cov_params() predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res2) np.testing.assert_almost_equal(prstd, predstd, 15) #stats.t.isf(0.05/2., 50 - 2) q = 2.0106347546964458 ci_half = q * predstd np.testing.assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12) np.testing.assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12) prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3,:]) np.testing.assert_equal(prstd, prstd[:3]) np.testing.assert_allclose(iv_u, res2.fittedvalues[:3] + ci_half[:3], rtol=1e-12) np.testing.assert_allclose(iv_l, res2.fittedvalues[:3] - ci_half[:3], rtol=1e-12) # check WLS res3 = WLS(y2, x2, 1. / w).fit() #direct calculation covb = res3.cov_params() predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res3) np.testing.assert_almost_equal(prstd, predstd, 15) #stats.t.isf(0.05/2., 50 - 2) q = 2.0106347546964458 ci_half = q * predstd np.testing.assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12) np.testing.assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12) # testing shapes of exog prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:,:], weights=3.) np.testing.assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,:], weights=3.) np.testing.assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:,:], weights=3.) np.testing.assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:,:], weights=[3, 3]) np.testing.assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3,:]) np.testing.assert_equal(prstd, prstd[:3]) np.testing.assert_allclose(iv_u, res3.fittedvalues[:3] + ci_half[:3], rtol=1e-12) np.testing.assert_allclose(iv_l, res3.fittedvalues[:3] - ci_half[:3], rtol=1e-12) #use wrong size for exog #prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1,0], weights=3.) np.testing.assert_raises(ValueError, wls_prediction_std, res3, x2[-1,0], weights=3.) # check some weight values sew1 = wls_prediction_std(res3, x2[-3:,:])[0]**2 for wv in np.linspace(0.5, 3, 5): sew = wls_prediction_std(res3, x2[-3:,:], weights=1. / wv)[0]**2 np.testing.assert_allclose(sew, sew1 + res3.scale * (wv - 1))
x = np.ones((nobs,2)) x[:,1] = np.arange(nobs)/20. y = x.sum(1) + 1.01*(1+1.5*(x[:,1]>10))*np.random.rand(nobs) print(het_goldfeldquandt(y,x, 1)) y = x.sum(1) + 1.01*(1+0.5*(x[:,1]>10))*np.random.rand(nobs) print(het_goldfeldquandt(y,x, 1)) y = x.sum(1) + 1.01*(1-0.5*(x[:,1]>10))*np.random.rand(nobs) print(het_goldfeldquandt(y,x, 1)) print(het_breuschpagan(y,x)) print(het_white(y,x)) f, fp, fo = het_goldfeldquandt(y,x, 1) print(f, fp) resgq = het_goldfeldquandt(y,x, 1, retres=True) print(resgq) #this is just a syntax check: print(_neweywestcov(y, x)) resols1 = OLS(y, x).fit() print(_neweywestcov(resols1.resid, x)) print(resols1.cov_params()) print(resols1.HC0_se) print(resols1.cov_HC0) y = x.sum(1) + 10.*(1-0.5*(x[:,1]>10))*np.random.rand(nobs) print(HetGoldfeldQuandt().run(y,x, 1, alternative='dec'))
#transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2) transf3 = TransformRestriction([[0, 0, 0, 1, 0],[0, 0, 0, 0, 1]], [0, 1]) exog3_st = transf3.reduce(exog) res3 = OLS(endog, exog3_st).fit() # need to correct for constant/offset in the optimization res3 = OLS(endog - exog.dot(transf3.constant.squeeze()), exog3_st).fit() params = transf3.expand(res3.params).squeeze() assert_allclose(params[:-2], res3_ols.params, rtol=1e-13) print(res3.params) print(params) print(res3_ols.params) print(res3_ols.bse) # the following raises `ValueError: cannot test a constant constraint` #tt = res3.t_test(transf3.transf_mat, transf3.constant.squeeze()) #print tt.sd cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T) bse3 = np.sqrt(np.diag(cov_params3)) print(bse3) tp = transform_params_constraint(res2.params, res2.normalized_cov_params, transf3.R, transf3.q) tp = transform_params_constraint(res2.params, res2.cov_params(), transf3.R, transf3.q) import statsmodels.api as sm rand_data = sm.datasets.randhie.load(as_pandas=False) rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog = sm.add_constant(rand_exog, prepend=False) # Fit Poisson model: poisson_mod0 = sm.Poisson(rand_data.endog, rand_exog)
#transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2) transf3 = TransformRestriction([[0, 0, 0, 1, 0],[0, 0, 0, 0, 1]], [0, 1]) exog3_st = transf3.reduce(exog) res3 = OLS(endog, exog3_st).fit() # need to correct for constant/offset in the optimization res3 = OLS(endog - exog.dot(transf3.constant.squeeze()), exog3_st).fit() params = transf3.expand(res3.params).squeeze() assert_allclose(params[:-2], res3_ols.params, rtol=1e-13) print(res3.params) print(params) print(res3_ols.params) print(res3_ols.bse) # the following raises `ValueError: can't test a constant constraint` #tt = res3.t_test(transf3.transf_mat, transf3.constant.squeeze()) #print tt.sd cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T) bse3 = np.sqrt(np.diag(cov_params3)) print(bse3) tp = transform_params_constraint(res2.params, res2.normalized_cov_params, transf3.R, transf3.q) tp = transform_params_constraint(res2.params, res2.cov_params(), transf3.R, transf3.q) from statsmodels.discrete.discrete_model import Poisson import statsmodels.api as sm rand_data = sm.datasets.randhie.load(as_pandas=False) rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog = sm.add_constant(rand_exog, prepend=False)
def test_combine_subset_regression(self): # split sample into two, use first sample as prior for second endog = self.endog exog = self.exog nobs = len(endog) n05 = nobs // 2 np.random.seed(987125) # shuffle to get random subsamples shuffle_idx = np.random.permutation(np.arange(nobs)) ys = endog[shuffle_idx] xs = exog[shuffle_idx] k = 10 res_ols0 = OLS(ys[:n05], xs[:n05, :k]).fit() res_ols1 = OLS(ys[n05:], xs[n05:, :k]).fit() w = res_ols1.scale / res_ols0.scale #1.01 mod_1 = TheilGLS(ys[n05:], xs[n05:, :k], r_matrix=np.eye(k), q_matrix=res_ols0.params, sigma_prior=w * res_ols0.cov_params()) res_1p = mod_1.fit(cov_type='data-prior') res_1s = mod_1.fit(cov_type='sandwich') res_olsf = OLS(ys, xs[:, :k]).fit() assert_allclose(res_1p.params, res_olsf.params, rtol=1e-9) corr_fact = 0.96156318 # corrct for differences in scale computation assert_allclose(res_1p.bse, res_olsf.bse * corr_fact, rtol=1e-3) # regression test, does not verify numbers # especially why are these smaller than OLS on full sample # in larger sample, nobs=600, those were close to full OLS bse1 = np.array([ 0.27609914, 0.15808869, 0.39880789, 0.78583194, 0.68619331, 0.56252314, 0.55757562, 0.68538523, 0.39695081, 0.55988991]) assert_allclose(res_1s.bse, bse1, rtol=1e-7)
def test_ols_noncentrality(self): k = self.k_groups res_ols = OLS(self.y, self.ex).fit() nobs_t = res_ols.model.nobs # constraint c_equal = -np.eye(k)[1:] c_equal[:, 0] = 1 v = np.zeros(c_equal.shape[0]) # noncentrality at estimated parameters wt = res_ols.wald_test(c_equal, scalar=True) df_num, df_denom = wt.df_num, wt.df_denom cov_p = res_ols.cov_params() nc_wt = wald_test_noncent_generic(res_ols.params, c_equal, v, cov_p, diff=None, joint=True) assert_allclose(nc_wt, wt.statistic * wt.df_num, rtol=1e-13) nc_wt2 = wald_test_noncent(res_ols.params, c_equal, v, res_ols, diff=None, joint=True) assert_allclose(nc_wt2, nc_wt, rtol=1e-13) es_ols = nc_wt / nobs_t es_oneway = smo.effectsize_oneway(res_ols.params, res_ols.scale, self.nobs, use_var="equal") assert_allclose(es_ols, es_oneway, rtol=1e-13) alpha = 0.05 pow_ols = smpwr.ftest_power(np.sqrt(es_ols), df_denom, df_num, alpha, ncc=1) pow_oneway = smpwr.ftest_anova_power(np.sqrt(es_oneway), nobs_t, alpha, k_groups=k, df=None) assert_allclose(pow_ols, pow_oneway, rtol=1e-13) # noncentrality at other params params_alt = res_ols.params * 0.75 # compute constraint value so we can get noncentrality from wald_test v_off = _offset_constraint(c_equal, res_ols.params, params_alt) wt_off = res_ols.wald_test((c_equal, v + v_off), scalar=True) nc_wt_off = wald_test_noncent_generic(params_alt, c_equal, v, cov_p, diff=None, joint=True) assert_allclose(nc_wt_off, wt_off.statistic * wt_off.df_num, rtol=1e-13) # check vectorized version, joint=False nc_wt_vec = wald_test_noncent_generic(params_alt, c_equal, v, cov_p, diff=None, joint=False) for i in range(c_equal.shape[0]): nc_wt_i = wald_test_noncent_generic( params_alt, c_equal[i:i + 1], # noqa v[i:i + 1], cov_p, diff=None, # noqa joint=False) assert_allclose(nc_wt_vec[i], nc_wt_i, rtol=1e-13)
def sequential_break_test(endog, exog, nbreaks_null=0, trim=0.15, vcov=None): """ Test that one more break exists in the sample, given that there are at least `nbreaks_null` breaks. TODO obviously in sequential estimation of breakpoints, this will right now recalculate the SSRs for the segments of the model many times. Easy optimization is possible TODO add support for dates, and return breakdates as well TODO add support for p-value calculation (Hansen 1997) TODO add support for different trimming values when calculating the null model and when estimating the additional break (see Footnote 4, Bai and Perron 2003) Parameters ---------- endog : array-like The endogenous variable. exog : array-like The exogenous matrix. nbreaks_null : integer The number of breakpoints in the null hypothesis trim : float or int, optional If a float, the minimum percentage of observations in each regime, if an integer, the minimum number of observations in each regime. vcov : callback, optional Optionally provide a callback to modify the variance / covariance matrix used in calculating the test statistic. Returns ------- fstat : float The test statistic. crits : iterable The critical values. """ nobs = len(endog) if trim < 1: trim = int(np.floor(trim * nobs)) exog = np.asarray(exog) # TODO Is there a better way to test for and fix this? (the problem is # that if the exog argument is a list, so that exog is 1dim, # np.concatenate fails to create a matrix, instead just makes a # long vector) if exog.ndim == 1: exog = exog[:, None] # TODO add test to make sure trim is consistent with the number of breaks # in both the null and alternative hypotheses # Estimate the breakpoints under the null breakpoints, ssr_null = find_breakpoints(endog, exog, nbreaks_null, trim) # Get the indices for the start and end of each segment segments = zip((0,) + breakpoints, breakpoints + (nobs,)) # For each segment (there are nbreaks_null+1), estimate an additional # breakpoint optimal_segment = None new_breakpoints = None min_ssr = np.Inf for segment in range(nbreaks_null+1): start, end = segments[segment] # Add one to the end, since breakpoint is actually the last observation # in the previous regime end += 1 segment_endog = endog[start:end] segment_exog = exog[start:end] # TODO this involves re-calculating SSR for lots of segments. Should # use a cache of the upper-triangular set of SSRs from the # find_breakpoints estimation above try: breakpoint, ssr = find_breakpoints(segment_endog, segment_exog, 1, trim) if ssr < min_ssr: min_ssr = ssr optimal_segment = segment new_breakpoints = breakpoint except InvalidRegimeError: pass # Find the parameters start, end = segments[optimal_segment] end += 1 segment_exog, _, _ = build_exog(exog[start:end], new_breakpoints) res = OLS(endog[start:end], segment_exog).fit() # Calculate the test statistic nbreaks = 1 q = exog.shape[1] # number of parameters subject to break, hard-coded to entire exog for now p = 0 # number of parameters not subject to break, hard-coded to zero for now R = np.zeros((nbreaks, nbreaks+1)) R[np.diag_indices(nbreaks)] = [-1]*nbreaks R[tuple(np.diag_indices(nbreaks) + np.array([[0]*nbreaks, [1]*nbreaks]))] = [1]*nbreaks Rd = R.dot(res.params[:, None]) V = vcov(res) if vcov else res.cov_params() nobs_segment = end - start const = (nobs_segment - (nbreaks+1)*q - p) / (nobs_segment*nbreaks*q) fstat = const * Rd.T.dot(np.linalg.inv(R.dot(V).dot(R.T))).dot(Rd) return fstat