def test_fama_macbeth(data): res = FamaMacBeth(data.y, data.x).fit(debiased=True) y = PanelData(data.y) x = PanelData(data.x) missing = y.isnull | x.isnull y.drop(missing) x.drop(missing) y = y.dataframe x = x.dataframe times = y.index.levels[1] params = [] for t in times: _y = y.xs(t, level=1) _x = x.xs(t, level=1) if _x.shape[0] < _x.shape[1]: continue _x = _x.loc[_y.index] params.append(lstsq(_x.values, _y.values, rcond=None)[0]) params = np.array(params).squeeze() all_params = params params = params.mean(0) assert_allclose(params.squeeze(), res.params) assert_allclose(all_params, res.all_params.dropna(how="all")) e_params = all_params - params[None, :] ntime = e_params.shape[0] cov = e_params.T @ e_params / ntime / (ntime - 1) assert_allclose(cov, np.asarray(res.cov)) access_attributes(res)
def test_fama_macbeth(data): res = FamaMacBeth(data.y, data.x).fit(debiased=True) y = PanelData(data.y) x = PanelData(data.x) missing = y.isnull | x.isnull y.drop(missing) x.drop(missing) y = y.dataframe x = x.dataframe times = y.index.levels[1] params = [] for t in times: _y = y.xs(t, level=1) _x = x.xs(t, level=1) if _x.shape[0] < _x.shape[1]: continue _x = _x.loc[_y.index] params.append(np.linalg.lstsq(_x.values, _y.values)[0]) params = np.array(params).squeeze() all_params = params params = params.mean(0) assert_allclose(params.squeeze(), res.params) e_params = all_params - params[None, :] ntime = e_params.shape[0] cov = e_params.T @ e_params / ntime / (ntime - 1) assert_allclose(cov, res.cov.values) d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val()
def test_mean_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) entity_mean = x.mean("entity", weights=w) c = x.index.levels[0][get_codes(x.index)[0]] d = get_dummies(Categorical(c, ordered=True)) d = d[entity_mean.index] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = lstsq(wd, wx, rcond=None)[0] assert_allclose(entity_mean, mu) time_mean = x.mean("time", weights=w) c = x.index.levels[1][get_codes(x.index)[1]] d = get_dummies(Categorical(c, ordered=True)) d = d[list(time_mean.index)] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = pinv(wd) @ wx assert_allclose(time_mean, mu)
def test_mean_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) entity_mean = x.mean('entity', weights=w) c = x.index.levels[0][x.index.labels[0]] d = pd.get_dummies(pd.Categorical(c, ordered=True)) d = d[entity_mean.index] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = np.linalg.lstsq(wd, wx)[0] assert_allclose(entity_mean, mu) time_mean = x.mean('time', weights=w) c = x.index.levels[1][x.index.labels[1]] d = pd.get_dummies(pd.Categorical(c, ordered=True)) ilocs = [int(d.columns.get_indexer_for([i])) for i in time_mean.index] d = d.iloc[:, ilocs] # TODO: Restore when fixed in pandas # d = d[time_mean.index] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = pinv(wd) @ wx assert_allclose(time_mean, mu)
def test_drop(mi_df): dh = PanelData(mi_df) orig = dh.dataframe.copy() sel = np.zeros(orig.shape[0], dtype=np.bool) sel[::3] = True dh.drop(sel) assert dh.dataframe.shape[0] == len(sel) - sel.sum()
def test_demean_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) entity_demean = x.demean("entity", weights=w) d = get_dummies(Categorical(get_codes(x.index)[0])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = wd @ lstsq(wd, wx, rcond=None)[0] e = wx - mu assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(e)) time_demean = x.demean("time", weights=w) d = get_dummies(Categorical(get_codes(x.index)[1])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = wd @ lstsq(wd, wx, rcond=None)[0] e = wx - mu assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(e))
def test_demean_against_dummy_regression(data): dh = PanelData(data.x) dh.drop(dh.isnull) df = dh.dataframe no_index = df.reset_index() cat = Categorical(no_index[df.index.levels[0].name]) d = get_dummies(cat, drop_first=False).astype(np.float64) dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0] entity_demean = dh.demean("entity") assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(dummy_demeaned)) cat = Categorical(no_index[df.index.levels[1].name]) d = get_dummies(cat, drop_first=False).astype(np.float64) dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0] time_demean = dh.demean("time") assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(dummy_demeaned)) cat = Categorical(no_index[df.index.levels[0].name]) d1 = get_dummies(cat, drop_first=False).astype(np.float64) cat = Categorical(no_index[df.index.levels[1].name]) d2 = get_dummies(cat, drop_first=True).astype(np.float64) d = np.c_[d1.values, d2.values] dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0] both_demean = dh.demean("both") assert_allclose(1 + np.abs(both_demean.values2d), 1 + np.abs(dummy_demeaned))
def test_mean_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) entity_mean = x.mean('entity', weights=w) c = x.index.levels[0][x.index.labels[0]] d = pd.get_dummies(pd.Categorical(c, ordered=True)) d = d[entity_mean.index] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = np.linalg.lstsq(wd, wx)[0] assert_allclose(entity_mean, mu) time_mean = x.mean('time', weights=w) c = x.index.levels[1][x.index.labels[1]] d = pd.get_dummies(pd.Categorical(c, ordered=True)) d = d[time_mean.index] d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = pinv(wd) @ wx assert_allclose(time_mean, mu)
def test_demean_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) entity_demean = x.demean('entity', weights=w) d = pd.get_dummies(pd.Categorical(x.index.labels[0])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = wd @ np.linalg.lstsq(wd, wx)[0] e = wx - mu assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(e)) time_demean = x.demean('time', weights=w) d = pd.get_dummies(pd.Categorical(x.index.labels[1])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d wd = d * root_w mu = wd @ np.linalg.lstsq(wd, wx)[0] e = wx - mu assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(e))
def test_all_missing(data): y = PanelData(data.y) x = PanelData(data.x) missing = y.isnull | x.isnull y.drop(missing) x.drop(missing) import warnings with warnings.catch_warnings(record=True) as w: PanelOLS(y.dataframe, x.dataframe).fit() assert len(w) == 0
def test_demean_missing_alt_types(data): xpd = PanelData(data.x) xpd.drop(xpd.isnull) entity_demean = xpd.demean('entity') expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean()) assert_frame_equal(entity_demean.dataframe, expected) time_demean = xpd.demean('time') expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean()) assert_frame_equal(time_demean.dataframe, expected)
def test_demean_many_missing_dropped(panel): panel.iloc[0, ::3, ::3] = np.nan data = PanelData(panel) data.drop(data.isnull) fe = data.demean('entity') expected = data.values2d.copy() eid = data.entity_ids.ravel() for i in np.unique(eid): expected[eid == i] -= np.nanmean(expected[eid == i], 0) assert_allclose(fe.values2d, expected)
def test_mean_missing(data): xpd = PanelData(data.x) xpd.drop(xpd.isnull) entity_mean = xpd.mean("entity") expected = xpd.dataframe.groupby(level=0).mean() expected = expected.loc[xpd.entities] expected.columns.name = None assert_frame_equal(entity_mean, expected) time_mean = xpd.mean("time") expected = xpd.dataframe.groupby(level=1).mean() expected = expected.loc[xpd.time] expected.columns.name = None assert_frame_equal(time_mean, expected)
def test_demean_simple_weighted(data): x = PanelData(data.x) w = PanelData(data.w) missing = x.isnull | w.isnull x.drop(missing) w.drop(missing) w.dataframe.iloc[:, 0] = 1 unweighted_entity_demean = x.demean('entity') weighted_entity_demean = x.demean('entity', weights=w) assert_allclose(unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe) unweighted_entity_demean = x.demean('time') weighted_entity_demean = x.demean('time', weights=w) assert_allclose(unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe)
def test_demean_missing_alt_types(data): check = isinstance(data.x, (DataFrame, np.ndarray)) xpd = PanelData(data.x) xpd.drop(xpd.isnull) entity_demean = xpd.demean('entity') expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean()) assert_frame_equal(entity_demean.dataframe, expected, check_index_type=check, check_column_type=check) time_demean = xpd.demean('time') expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean()) assert_frame_equal(time_demean.dataframe, expected, check_index_type=check, check_column_type=check)
def test_count(data): xpd = PanelData(data.x) xpd.drop(xpd.isnull) entity_mean = xpd.count("entity") expected = xpd.dataframe.groupby(level=0).count() expected = expected.loc[xpd.entities] expected.columns.name = None expected = expected.astype(np.int64) assert_frame_equal(entity_mean, expected) time_mean = xpd.count("time") expected = xpd.dataframe.groupby(level=1).count() expected = expected.loc[xpd.time] expected.columns.name = None expected = expected.astype(np.int64) assert_frame_equal(time_mean, expected)
def test_demean_many_missing_dropped(mi_df): entities = mi_df.index.levels[0] times = mi_df.index.levels[1] column = mi_df.columns[0] for entity in entities[::3]: mi_df.loc[entity, column] = np.nan mi_df.index = mi_df.index.swaplevel() for time in times[::3]: mi_df.loc[time, column] = np.nan mi_df.index = mi_df.index.swaplevel() data = PanelData(mi_df) data.drop(data.isnull) fe = data.demean("entity") expected = data.values2d.copy() eid = data.entity_ids.ravel() for i in np.unique(eid): expected[eid == i] -= np.nanmean(expected[eid == i], 0) assert_allclose(fe.values2d, expected)
def test_entity_other(data): y = PanelData(data.y) x = PanelData(data.x) c = PanelData(data.c).copy() missing = y.isnull | x.isnull | c.isnull y.drop(missing) x.drop(missing) c.drop(missing) c_entity = c.dataframe.copy() c_entity.iloc[:, 1] = y.entity_ids.squeeze() c_entity = c_entity.astype(np.int64) mod = PanelOLS(y, x, other_effects=c_entity) res = mod.fit(debiased=False) c_only = PanelData(c.dataframe.iloc[:, [0]].astype(np.int64)) mod2 = PanelOLS(y, x, other_effects=c_only, entity_effects=True) res2 = mod2.fit(debiased=False) assert_results_equal(res, res2)
y = PanelData(data.y) x = PanelData(data.x) w = PanelData(data.w) x.dataframe.iloc[:, 0] = 1 mod = PanelOLS(data.y, data.x, weights=data.w) mod.fit() mod = PanelOLS(y, x, weights=data.w, entity_effects=True) mod.fit() mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True) mod.fit() mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, entity_effects=True) mod.fit() missing = y.isnull | x.isnull | w.isnull y.drop(missing) x.drop(missing) w.drop(missing) x.dataframe.iloc[:, 0] = 1 ydw = y.demean(weights=w) xdw = x.demean(weights=w) d = x.dummies('entity', drop_first=False) root_w = np.sqrt(w.values2d) wd = root_w * d wdx_direct = root_w * x.values2d - wd @ np.linalg.lstsq(wd, root_w * x.values2d)[0] print(np.abs(wdx_direct[0] - xdw.values2d[0]) > 1e-14) mux = (w.values2d * x.values2d).sum(0) / w.values2d.sum() muy = (w.values2d * y.values2d).sum(0) / w.values2d.sum() xx = xdw.values2d + root_w * mux