# xx = sm.add_constant(x, prepend=True) xx = sm.add_constant(x, prepend=False) # for Stata compatibility # remove nan observation mask = (xx != -999.0).all(1) # nan code in dta file mask.shape y = y[mask] xx = xx[mask] group = group[mask] res_srs = sm.OLS(y, xx).fit() print res_srs.params print res_srs.bse bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1] print bse_cr res_stata = np.rec.array( [ ( "growth", "|", -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997, ), ("emer", "|", -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998),
#res.resid is of transformed model #np.corrcoef(res.resid.reshape(-1,n_groups, order='F')) y_pred = np.dot(mod.exog, res.params) resid = y - y_pred print np.corrcoef(resid.reshape(-1,n_groups, order='F')) print resid.std() err = y_pred - dgp.y_true print err.std() #OLS standard errors are too small mod.res_pooled.params mod.res_pooled.bse #heteroscedasticity robust doesn't help mod.res_pooled.HC1_se #compare with cluster robust se import scikits.statsmodels.sandbox.panel.sandwich_covariance as sw print sw.cov_cluster(mod.res_pooled, dgp.groups.astype(int))[1] #not bad, pretty close to panel estimator #and with Newey-West Hac print sw.se_cov(sw.cov_nw_panel(mod.res_pooled, 5, mod.group.groupidx)) #too small, assuming no bugs, #see Peterson assuming it refers to same kind of model print dgp.cov mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) print res2.params print res2.bse #both implementations produce the same results: from numpy.testing import assert_almost_equal assert_almost_equal(res.params, res2.params, decimal=14) assert_almost_equal(res.bse, res2.bse, decimal=14)
#test White assert_almost_equal(bse_w, self.HC0_se, 15) bse_wc = sw.cov_white_simple(self, use_correction=True)[1] print bse_wc #test White assert_almost_equal(bse_wc, self.HC1_se, 15) groups = np.repeat(np.arange(5), 20) idx = np.nonzero(np.diff(groups))[0].tolist() groupidx = zip([0]+idx, idx+[len(groups)]) ngroups = len(groupidx) print sw.cov_cluster(self, groups)[1] #two strange looking corner cases BUG? print sw.cov_cluster(self, np.ones(len(endog), int), use_correction=False)[1] print sw.cov_crosssection_0(self, np.arange(len(endog)))[1] #these results are close to simple (no group) white, 50 groups 2 obs each groups = np.repeat(np.arange(50), 100//50) print sw.cov_cluster(self, groups)[1] #2 groups with 50 obs each, what was the interpretation again? groups = np.repeat(np.arange(2), 100//2) print sw.cov_cluster(self, groups)[1] "http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt" ''' test <- read.table( url(paste("http://www.kellogg.northwestern.edu/", "faculty/petersen/htm/papers/se/",