def test_panel_no_effects(data): res = PanelOLS(data.y, data.x).fit() res2 = PooledOLS(data.y, data.x).fit() assert_results_equal(res, res2)
def test_panel_other_lsdv(data): mod = PanelOLS(data.y, data.x, other_effects=data.c) assert 'Num Other Effects: 2' in str(mod) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe.copy() x = mod.exog.dataframe.copy() c = mod._other_effect_cats.dataframe.copy() d = [] d_columns = [] for i, col in enumerate(c): s = c[col].copy() dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0)) dummies.columns = [s.name + '_val_' + str(c) for c in dummies.columns] d_columns.extend(list(dummies.columns)) d.append(dummies.values) d = np.column_stack(d) if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) res3 = mod.fit(cov_type='unadjusted', auto_df=False, count_effects=False, debiased=False) assert_results_equal(res, res3) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_lsdv_options(data): mod = PanelOLS(data.y, data.x, weights=data.w) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) c1 = PanelData(data.c).dataframe.iloc[:, [0]] mod = PanelOLS(data.y, data.x, entity_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2)
def test_panel_time_lsdv(large_data): mod = PanelOLS(large_data.y, large_data.x, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d = mod.dependent.dummies('time', drop_first=mod.has_constant) d_cols = list(d.columns) d = d.values if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_cols) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_both_lsdv_weighted(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True, weights=data.w) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe w = mod.weights.dataframe d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant) d2 = mod.dependent.dummies('time', drop_first=True) d = np.c_[d1.values, d2.values] if mod.has_constant: z = np.ones_like(y) root_w = np.sqrt(w.values) wd = root_w * d wz = root_w * z d = d - z @ lstsq(wz, wd)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)) ols_mod = IV2SLS(y, xd, None, None, weights=w) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_no_effects_weighted(data): res = PanelOLS(data.y, data.x, weights=data.w).fit() res2 = PooledOLS(data.y, data.x, weights=data.w).fit() assert_results_equal(res, res2)
def test_firstdifference_ols_weighted(data): mod = FirstDifferenceOLS(data.y, data.x, weights=data.w) res = mod.fit(debiased=False) y = mod.dependent.values3d x = mod.exog.values3d dy = np.array(y[0, 1:] - y[0, :-1]) dy = pd.DataFrame( dy, index=mod.dependent.panel.major_axis[1:], columns=mod.dependent.panel.minor_axis, ) dy = dy.T.stack() dy = dy.reindex(mod.dependent.index) dx = x[:, 1:] - x[:, :-1] _dx = {} for i, dxi in enumerate(dx): temp = pd.DataFrame( dxi, index=mod.dependent.panel.major_axis[1:], columns=mod.dependent.panel.minor_axis, ) temp = temp.T.stack() temp = temp.reindex(mod.dependent.index) _dx[mod.exog.vars[i]] = temp dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index) for key in _dx: dx[key] = _dx[key] dx = dx[mod.exog.vars] w = mod.weights.values3d w = 1.0 / w sw = w[0, 1:] + w[0, :-1] sw = pd.DataFrame( sw, index=mod.dependent.panel.major_axis[1:], columns=mod.dependent.panel.minor_axis, ) sw = sw.T.stack() sw = sw.reindex(mod.dependent.index) sw = 1.0 / sw sw = sw / sw.mean() drop = dy.isnull() | np.any(dx.isnull(), 1) | sw.isnull() dy = dy.loc[~drop] dx = dx.loc[~drop] sw = sw.loc[~drop] ols_mod = IV2SLS(dy, dx, None, None, weights=sw) ols_res = ols_mod.fit(cov_type="unadjusted") assert_results_equal(res, ols_res) res = mod.fit(cov_type="robust", debiased=False) ols_res = ols_mod.fit(cov_type="robust") assert_results_equal(res, ols_res) clusters = data.vc1 ols_clusters = mod.reformat_clusters(data.vc1) fd = mod.dependent.first_difference() ols_clusters = ols_clusters.dataframe.loc[fd.index] res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False) ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters) assert_results_equal(res, ols_res)
def test_firstdifference_ols(data): mod = FirstDifferenceOLS(data.y, data.x) res = mod.fit(debiased=False) y = mod.dependent.values3d x = mod.exog.values3d dy = np.array(y[0, 1:] - y[0, :-1]) dy = pd.DataFrame( dy, index=mod.dependent.panel.major_axis[1:], columns=mod.dependent.panel.minor_axis, ) dy = dy.T.stack() dy = dy.reindex(mod.dependent.index) dx = x[:, 1:] - x[:, :-1] _dx = {} for i, dxi in enumerate(dx): temp = pd.DataFrame( dxi, index=mod.dependent.panel.major_axis[1:], columns=mod.dependent.panel.minor_axis, ) temp = temp.T.stack() temp = temp.reindex(mod.dependent.index) _dx[mod.exog.vars[i]] = temp dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index) for key in _dx: dx[key] = _dx[key] dx = dx[mod.exog.vars] drop = dy.isnull() | np.any(dx.isnull(), 1) dy = dy.loc[~drop] dx = dx.loc[~drop] ols_mod = IV2SLS(dy, dx, None, None) ols_res = ols_mod.fit(cov_type="unadjusted") assert_results_equal(res, ols_res) res = mod.fit(cov_type="robust", debiased=False) ols_res = ols_mod.fit(cov_type="robust") assert_results_equal(res, ols_res) clusters = data.vc1 ols_clusters = mod.reformat_clusters(data.vc1) fd = mod.dependent.first_difference() ols_clusters = ols_clusters.dataframe.loc[fd.index] res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False) ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters) assert_results_equal(res, ols_res) res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False) entity_clusters = mod.dependent.first_difference().entity_ids ols_res = ols_mod.fit(cov_type="clustered", clusters=entity_clusters) assert_results_equal(res, ols_res) ols_clusters["entity.clusters"] = entity_clusters ols_clusters = ols_clusters.astype(np.int32) res = mod.fit(cov_type="clustered", cluster_entity=True, clusters=data.vc1, debiased=False) ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters) assert_results_equal(res, ols_res)
def test_panel_entity_lsdv(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe if mod.has_constant: d = mod.dependent.dummies('entity', drop_first=True) z = np.ones_like(y) d_demean = d.values - z @ np.linalg.lstsq(z, d.values)[0] else: d = mod.dependent.dummies('entity', drop_first=False) d_demean = d.values xd = np.c_[x.values, d_demean] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted', debiased=False) assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(data.vc1) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(data.vc2) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_absorbed_option(data): mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=False) res_false = mod.fit(auto_df=False, count_effects=False, debiased=False) assert_results_equal(res, res_false)