def test_categorical_input(data): y = PanelData(data.y) nt = y.values2d.shape[0] effects = np.random.randint(0, 5, size=(nt, 2)) temp = {} for i, e in enumerate(effects.T): name = "effect." + str(i) temp[name] = pd.Categorical(pd.Series(e, index=y.index, name=name)) effects = pd.DataFrame(temp, index=y.index) mod = PanelOLS(data.y, data.x, other_effects=effects) mod.fit() clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2)) temp = {} for i, c in enumerate(clusters.T): name = "effect." + str(i) temp[name] = pd.Categorical(pd.Series(c, index=y.index, name=name)) clusters = pd.DataFrame(temp, index=y.index) mod.fit(cov_type="clustered", clusters=clusters)
def test_incorrect_type(data): dependent = data.set_index(['nr', 'year']).lwage exog = sm.add_constant( data.set_index(['nr', 'year'])[['expersq', 'married', 'union']]) mod = PanelOLS(dependent, exog) res = mod.fit() mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None) res2 = mod2.fit() with pytest.raises(TypeError): compare(dict(model1=res, model2=res2))
def test_mixed_input(data): y = PanelData(data.y) nt = y.values2d.shape[0] effects = np.random.randint(0, 5, size=(nt)) prim = ['a', 'b', 'c', 'd', 'e'] temp = {} temp['effect.0'] = pd.Categorical(pd.Series(effects, index=y.index)) temp['effect.1'] = pd.Series(np.random.choice(prim, size=(nt)), index=y.index) effects = pd.DataFrame(temp, index=y.index) mod = PanelOLS(data.y, data.x, other_effects=effects) mod.fit() clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2)) temp = {} prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase)))) temp['var.cluster.0'] = pd.Series(np.random.choice(prim, size=(nt)), index=y.index) temp['var.cluster.1'] = pd.Series(clusters[:, 1], index=y.index) clusters = pd.DataFrame(temp, index=y.index) mod.fit(cov_type='clustered', clusters=clusters)
def test_incorrect_type(data): dependent = data.set_index(["nr", "year"]).lwage exog = add_constant( data.set_index(["nr", "year"])[["expersq", "married", "union"]]) mod = PanelOLS(dependent, exog) res = mod.fit() mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None) res2 = mod2.fit() with pytest.raises(TypeError): compare(dict(model1=res, model2=res2))
def test_predict_no_selection(generated_data): mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True) res = mod.fit() with pytest.raises(ValueError): res.predict(fitted=False) with pytest.raises(ValueError): res.predict(fitted=False, effects=False, idiosyncratic=False, missing=True)
def test_singleton_removal_mixed(singleton_data, other_effects): if other_effects == 1: other_effects = PanelData(singleton_data.c).dataframe.iloc[:, [0]] elif other_effects == 2: other_effects = singleton_data.c mod = PanelOLS(singleton_data.y, singleton_data.x, other_effects=other_effects) res_keep = mod.fit(use_lsmr=True) mod = PanelOLS(singleton_data.y, singleton_data.x, other_effects=other_effects, singletons=False) res = mod.fit(cov_type='clustered', clusters=singleton_data.vc2, use_lsmr=True) assert_allclose(res_keep.params, res.params) assert res.nobs <= res_keep.nobs
def test_methods_equivalent(data, lsdv_config): other_effects = None if lsdv_config.other_effects == 1: other_effects = PanelData(data.c).dataframe.iloc[:, [0]] elif lsdv_config.other_effects == 2: other_effects = data.c weights = data.w if lsdv_config.weights else None mod = PanelOLS( data.y, data.x, weights=weights, entity_effects=lsdv_config.entity_effects, time_effects=lsdv_config.time_effects, other_effects=other_effects, ) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) res3 = mod.fit(use_lsmr=True) assert_results_equal(res1, res2) assert_results_equal(res2, res3, strict=False)
def test_predict(generated_data): mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True) res = mod.fit() pred = res.predict() nobs = mod.dependent.dataframe.shape[0] assert list(pred.columns) == ["fitted_values"] assert pred.shape == (nobs, 1) pred = res.predict(effects=True, idiosyncratic=True) assert list(pred.columns) == [ "fitted_values", "estimated_effects", "idiosyncratic" ] assert pred.shape == (nobs, 3) assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0]) assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:, 0]) assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0]) pred = res.predict(effects=True, idiosyncratic=True, missing=True) assert list(pred.columns) == [ "fitted_values", "estimated_effects", "idiosyncratic" ] assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3) mod = PanelOLS(generated_data.y, generated_data.x) res = mod.fit() pred = res.predict() assert list(pred.columns) == ["fitted_values"] assert pred.shape == (nobs, 1) pred = res.predict(effects=True, idiosyncratic=True) assert list(pred.columns) == [ "fitted_values", "estimated_effects", "idiosyncratic" ] assert pred.shape == (nobs, 3) assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0]) assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:, 0]) assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0]) pred = res.predict(effects=True, idiosyncratic=True, missing=True) assert list(pred.columns) == [ "fitted_values", "estimated_effects", "idiosyncratic" ] assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)
def test_string_input(data): y = PanelData(data.y) nt = y.values2d.shape[0] temp = {} prim = ['a', 'b', 'c', 'd', 'e'] for i in range(2): name = 'effect.' + str(i) temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name) effects = pd.DataFrame(temp, index=y.index) mod = PanelOLS(data.y, data.x, other_effects=effects) mod.fit() clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2)) temp = {} prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase)))) for i in range(clusters.shape[1]): name = 'effect.' + str(i) temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name) clusters = pd.DataFrame(temp, index=y.index) mod.fit(cov_type='clustered', clusters=clusters)
def test_pickle(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) remod = pickle.loads(pickle.dumps(mod)) res = mod.fit() reres = remod.fit() rereres = pickle.loads(pickle.dumps(res)) assert_allclose(res.params, reres.params) assert_allclose(res.params, rereres.params) assert_allclose(res.cov, reres.cov) assert_allclose(res.cov, rereres.cov) assert isinstance(res.f_statistic_robust, WaldTestStatistic) assert isinstance(reres.f_statistic_robust, WaldTestStatistic) assert isinstance(res.f_statistic_robust, WaldTestStatistic)
def test_absorbed(absorbed_data): mod = PanelOLS(absorbed_data.y, absorbed_data.x, drop_absorbed=True, entity_effects=True) if isinstance(absorbed_data.y, pd.DataFrame): match = "x_absorbed" else: match = "Exog.3" with pytest.warns(AbsorbingEffectWarning, match=match): res = mod.fit() if isinstance(absorbed_data.x, np.ndarray): x = absorbed_data.x[:-1] else: x = absorbed_data.x.iloc[:, :-1] mod = PanelOLS(absorbed_data.y, x, drop_absorbed=False, entity_effects=True) res_no = mod.fit() assert_allclose(res.params, res_no.params) assert_results_equal(res, res_no)
def test_panel_time_fwl(data): mod = PanelOLS(data.y, data.x, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d = mod.dependent.dummies('time', drop_first=mod.has_constant) d = d.values if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] x = x - d @ lstsq(d, x)[0] y = y - d @ lstsq(d, y)[0] ols_mod = IV2SLS(y, x, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_df=False) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_df=False)
def test_mixed_input(data): y = PanelData(data.y) nt = y.values2d.shape[0] effects = np.random.randint(0, 5, size=nt) prim = ["a", "b", "c", "d", "e"] temp = { "effect.0": pd.Categorical(pd.Series(effects, index=y.index)), "effect.1": pd.Series(np.random.choice(prim, size=nt), index=y.index), } effects = pd.DataFrame(temp, index=y.index) mod = PanelOLS(data.y, data.x, other_effects=effects) mod.fit() clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2)) temp = {} prim = list( map(lambda s: "".join(s), list(product(ascii_lowercase, ascii_lowercase))) ) temp["var.cluster.0"] = pd.Series(np.random.choice(prim, size=nt), index=y.index) temp["var.cluster.1"] = pd.Series(clusters[:, 1], index=y.index) clusters = pd.DataFrame(temp, index=y.index) mod.fit(cov_type="clustered", clusters=clusters)
def test_panel_entity_lsdv_weighted(data): mod = PanelOLS(data.y, data.x, entity_effects=True, weights=data.w) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe w = mod.weights.dataframe d = mod.dependent.dummies('entity', drop_first=mod.has_constant) d_cols = d.columns d = d.values if mod.has_constant: z = np.ones_like(y) root_w = np.sqrt(w.values) wd = root_w * d wz = root_w * z d = d - z @ lstsq(wz, wd)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols)) ols_mod = IV2SLS(y, xd, None, None, weights=w) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_results_access(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x, other_effects=data.c) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x) res = mod.fit() access_attributes(res) const = PanelData(data.y).copy() const.dataframe.iloc[:, :] = 1 const.dataframe.columns = ["const"] mod = PanelOLS(data.y, const) res = mod.fit() access_attributes(res)
def test_masked_singleton_removal(): nobs = 8 entities = ["A", "B", "C", "D"] * 2 times = [0, 1, 1, 1, 1, 2, 2, 2] index = pd.MultiIndex.from_arrays((entities, times)) x = pd.DataFrame(np.random.randn(nobs, 1), index=index, columns=["x"]) y = pd.DataFrame(np.random.randn(nobs, 1), index=index) mod = PanelOLS(y, x, singletons=False, entity_effects=True, time_effects=True) res = mod.fit() assert res.nobs == 6
def test_const_data_entity(const_data): y, x = const_data.y, const_data.x mod = PanelOLS(y, x, entity_effects=True) res = mod.fit(debiased=False) x = mod.exog.dataframe d = mod.dependent.dummies('entity', drop_first=True) d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0] xd = np.c_[x.values, d.values] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns)) res2 = IV2SLS(mod.dependent.dataframe, xd, None, None).fit() assert_allclose(res.params, res2.params.iloc[:1])
def test_panel_both_lsdv(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant) d2 = mod.dependent.dummies('time', drop_first=True) d = np.c_[d1.values, d2.values] if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def areg(df, y_var, X_vars, absorb_var, add_intercept=True): """ This function replicates areg in STATA. Inputs. --------- df:pd.DataFrame, the data for OLS. y_var:str, the column name of the dependent variable X_vars:list of str, the list of explanatory variable names (column names in df) g_var:str, the name of the column (varible) to be absorbed. The g_var column in df should be only contain categorical values ( df中g_var列应只含有离散值,可以是str,float或者int,比如公司名,公司代码,年份, 数值不可以是连续变量的值,如温度,股票回报等等。如果数值是连续变量的值,程序包也会执行, 但模型不具有经济学意义). Outputs. --------- res:obj """ new_df = df.copy() # new_df = df.dropna() new_df['time_index'] = 1.0 new_df['entity_index'] = new_df[absorb_var] new_df = new_df.set_index(['entity_index', 'time_index']) # entity first, and then year # 因变量 y = new_df[y_var] # 解释变量集合 if add_intercept: new_df['intercept'] = 1.0 X = new_df[['intercept'] + X_vars] else: X = new_df[X_vars] # weights: 权重变量,暂时没用; entity_effects: 把g_var转为多个dummy variables,然后将它们加入解释变量集合; time_effects: 忽视time index areg = PanelOLS(dependent=y, exog=X, weights=None, entity_effects=True, time_effects=False, singletons=False, drop_absorbed=True) res = areg.fit() return res
def test_predict_exception(generated_data): if np.any(np.isnan(generated_data.x)): pytest.skip("Cannot test with missing values") mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True) res = mod.fit() pred = res.predict() pred2 = res.predict(generated_data.x) assert_allclose(pred, pred2, atol=1e-3) panel_data = PanelData(generated_data.x, copy=True) x = panel_data.dataframe x.index = np.arange(x.shape[0]) with pytest.raises(ValueError, match="exog does not have the correct number"): res.predict(x)
def test_panel_entity_fwl(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe if mod.has_constant: d = mod.dependent.dummies('entity', drop_first=True) z = np.ones_like(y) d_demean = d.values - z @ lstsq(z, d.values)[0] else: d = mod.dependent.dummies('entity', drop_first=False) d_demean = d.values x = x - d_demean @ lstsq(d_demean, x)[0] y = y - d_demean @ lstsq(d_demean, y)[0] ols_mod = IV2SLS(y, x, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_df=False) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_df=False)
def test_singleton_removal(): entities = [] for i in range(6): entities.extend(["entity.{j}".format(j=j) for j in range(6 - i)]) nobs = len(entities) times = np.arange(nobs) % 6 index = pd.MultiIndex.from_arrays((entities, times)) cols = ["x{0}".format(i) for i in range(3)] x = pd.DataFrame(np.random.randn(nobs, 3), index=index, columns=cols) y = pd.DataFrame(np.random.randn(nobs, 1), index=index) mod = PanelOLS(y, x, singletons=False, entity_effects=True, time_effects=True) res = mod.fit() mod = PanelOLS(y, x, singletons=True, entity_effects=True, time_effects=True) res_with = mod.fit() assert_allclose(res.params, res_with.params)
def test_panel_effects_sanity(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected)
def xtreg(df, y_var, other_X_vars, fix1, fix2=None, add_intercept=True): """ This function replicates xtreg in STATA, for linear fixed effect model. 至少有一个固定效应变量,至多只能有两个。 Inputs. --------- df:pd.DataFrame, the data for OLS. y_var:str, the column name of the dependent variable other_X_vars:list of str, the list of explanatory variable names (除固定效应变量之外的解释变量列表) fix1:str, the column name of the first fix effect variable (第一个固定效应变量名) fix2:str or None, the column name of the second fix effect variable (if there is one) (第二个固定效应变量名) Outputs. --------- res:obj """ new_df = df.copy() new_df = new_df.dropna() if fix2 is None: # new_df.dropna(subset=[fix1], inplace=True) fix2 = 'time_index' fix2_effect = False new_df[fix2] = 1.0 else: # new_df.dropna(subset=[fix1, fix2], inplace=True) fix2_effect = True new_df = new_df.set_index([fix1, fix2]) # entity first, and then year y = new_df[y_var] if add_intercept: new_df['intercept'] = 1.0 X = new_df[['intercept'] + other_X_vars] else: X = new_df[other_X_vars] xtreg = PanelOLS(dependent=y, exog=X, weights=None, entity_effects=True, time_effects=fix2_effect, other_effects=None, drop_absorbed=True) res = xtreg.fit() return res
def test_valid_weight_shape(data): # Same size n = np.prod(data.y.shape) weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d missing = PanelData(data.y).isnull | PanelData(data.x).isnull expected = weights[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per time n = data.y.shape[0] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights[:, None] @ np.ones((1, data.y.shape[1])) expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per entity n = data.y.shape[1] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = np.ones((data.y.shape[0], 1)) @ weights[None, :] expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) weights = 1 + np.random.random_sample(data.y.shape) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected)
def test_const_data_both(const_data): y, x = const_data.y, const_data.x mod = PanelOLS(y, x, entity_effects=True, time_effects=True) res = mod.fit(debiased=False) x = mod.exog.dataframe d1 = mod.dependent.dummies('entity', drop_first=True) d1.columns = ['d.entity.{0}'.format(i) for i in d1] d2 = mod.dependent.dummies('time', drop_first=True) d2.columns = ['d.time.{0}'.format(i) for i in d2] d = np.c_[d1.values, d2.values] d = pd.DataFrame(d, index=x.index, columns=list(d1.columns) + list(d2.columns)) d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0] xd = np.c_[x.values, d.values] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns)) res2 = IV2SLS(mod.dependent.dataframe, xd, None, None).fit() assert_allclose(res.params, res2.params.iloc[:1])
def test_fitted_effects_residuals(data, entity_eff, time_eff): mod = PanelOLS(data.y, data.x, entity_effects=entity_eff, time_effects=entity_eff) res = mod.fit() expected = mod.exog.values2d @ res.params.values expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values']) assert_allclose(res.fitted_values, expected) assert_frame_similar(res.fitted_values, expected) expected.iloc[:, 0] = res.resids expected.columns = ['idiosyncratic'] assert_allclose(res.idiosyncratic, expected) assert_frame_similar(res.idiosyncratic, expected) fitted_error = res.fitted_values + res.idiosyncratic.values expected.iloc[:, 0] = mod.dependent.values2d - fitted_error expected.columns = ['estimated_effects'] assert_allclose(res.estimated_effects, expected, atol=1e-8) assert_frame_similar(res.estimated_effects, expected)
def test_const_data_entity_weights(const_data): y, x = const_data.y, const_data.x mod = PanelOLS(y, x, entity_effects=True, weights=const_data.w) res = mod.fit(debiased=False) y = mod.dependent.dataframe w = mod.weights.dataframe x = mod.exog.dataframe d = mod.dependent.dummies('entity', drop_first=True) d_columns = list(d.columns) root_w = np.sqrt(w.values) z = np.ones_like(x) wd = root_w * d.values wz = root_w d = d - z @ lstsq(wz, wd)[0] xd = np.c_[x.values, d.values] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_columns) res2 = IV2SLS(y, xd, None, None, weights=w).fit() assert_allclose(res.params, res2.params.iloc[:1])
def test_const_data_both_weights(const_data): y, x = const_data.y, const_data.x mod = PanelOLS(y, x, entity_effect=True, time_effect=True, weights=const_data.w) res = mod.fit(debiased=False) w = mod.weights.dataframe x = mod.exog.dataframe d1 = mod.dependent.dummies('entity', drop_first=True) d2 = mod.dependent.dummies('time', drop_first=True) d = np.c_[d1.values, d2.values] root_w = np.sqrt(w.values) z = np.ones_like(x) wd = root_w * d wz = root_w d = d - z @ np.linalg.lstsq(wz, wd)[0] d = pd.DataFrame(d, index=x.index, columns=list(d1.columns) + list(d2.columns)) xd = np.c_[x.values, d.values] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns)) res2 = IV2SLS(mod.dependent.dataframe, xd, None, None, weights=w).fit() assert_allclose(res.params, res2.params.iloc[:1])
def test_cov_equiv_cluster(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit(cov_type='clustered', cluster_entity=True, debiased=False) y = PanelData(data.y) clusters = pd.DataFrame(y.entity_ids, index=y.index) res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False) assert_results_equal(res, res2) mod = PanelOLS(data.y, data.x, time_effects=True) res = mod.fit(cov_type='clustered', cluster_time=True, debiased=False) y = PanelData(data.y) clusters = pd.DataFrame(y.time_ids, index=y.index) res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False) assert_results_equal(res, res2) res = mod.fit(cov_type='clustered', debiased=False) res2 = mod.fit(cov_type='clustered', clusters=None, debiased=False) assert_results_equal(res, res2)