Exemplo n.º 1
0
def test_panel_no_effects(data):
    res = PanelOLS(data.y, data.x).fit()
    res2 = PooledOLS(data.y, data.x).fit()
    assert_results_equal(res, res2)
Exemplo n.º 2
0
def test_panel_other_lsdv(data):
    mod = PanelOLS(data.y, data.x, other_effects=data.c)
    assert 'Num Other Effects: 2' in str(mod)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe.copy()
    x = mod.exog.dataframe.copy()
    c = mod._other_effect_cats.dataframe.copy()
    d = []
    d_columns = []
    for i, col in enumerate(c):
        s = c[col].copy()
        dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0))
        dummies.columns = [s.name + '_val_' + str(c) for c in dummies.columns]
        d_columns.extend(list(dummies.columns))
        d.append(dummies.values)
    d = np.column_stack(d)

    if mod.has_constant:
        z = np.ones_like(y)
        d = d - z @ lstsq(z, d)[0]

    xd = np.c_[x.values, d]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_columns))

    ols_mod = IV2SLS(y, xd, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_fit=False)

    res3 = mod.fit(cov_type='unadjusted', auto_df=False, count_effects=False, debiased=False)
    assert_results_equal(res, res3)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc2
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False,
                  count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False,
                  count_effects=False, debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False,
                  count_effects=False, debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
Exemplo n.º 3
0
def test_lsdv_options(data):
    mod = PanelOLS(data.y, data.x, weights=data.w)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effects=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    c1 = PanelData(data.c).dataframe.iloc[:, [0]]
    mod = PanelOLS(data.y, data.x, entity_effects=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effects=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)
Exemplo n.º 4
0
def test_panel_time_lsdv(large_data):
    mod = PanelOLS(large_data.y, large_data.x, time_effects=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    d = mod.dependent.dummies('time', drop_first=mod.has_constant)
    d_cols = list(d.columns)
    d = d.values
    if mod.has_constant:
        z = np.ones_like(y)
        d = d - z @ lstsq(z, d)[0]

    xd = np.c_[x.values, d]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_cols)

    ols_mod = IV2SLS(y, xd, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_fit=False)
    assert_allclose(res.rsquared_inclusive, res2.rsquared)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = large_data.vc1
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = large_data.vc2
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
Exemplo n.º 5
0
def test_panel_both_lsdv_weighted(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True, weights=data.w)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    w = mod.weights.dataframe
    d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant)
    d2 = mod.dependent.dummies('time', drop_first=True)
    d = np.c_[d1.values, d2.values]

    if mod.has_constant:
        z = np.ones_like(y)
        root_w = np.sqrt(w.values)
        wd = root_w * d
        wz = root_w * z
        d = d - z @ lstsq(wz, wd)[0]

    xd = np.c_[x.values, d]
    xd = pd.DataFrame(xd,
                      index=x.index,
                      columns=list(x.columns) + list(d1.columns) + list(d2.columns))

    ols_mod = IV2SLS(y, xd, None, None, weights=w)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_fit=False)
    assert_allclose(res.rsquared_inclusive, res2.rsquared)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc2
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
Exemplo n.º 6
0
def test_panel_no_effects_weighted(data):
    res = PanelOLS(data.y, data.x, weights=data.w).fit()
    res2 = PooledOLS(data.y, data.x, weights=data.w).fit()
    assert_results_equal(res, res2)
Exemplo n.º 7
0
def test_firstdifference_ols_weighted(data):
    mod = FirstDifferenceOLS(data.y, data.x, weights=data.w)
    res = mod.fit(debiased=False)

    y = mod.dependent.values3d
    x = mod.exog.values3d
    dy = np.array(y[0, 1:] - y[0, :-1])
    dy = pd.DataFrame(
        dy,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    dy = dy.T.stack()
    dy = dy.reindex(mod.dependent.index)

    dx = x[:, 1:] - x[:, :-1]
    _dx = {}
    for i, dxi in enumerate(dx):
        temp = pd.DataFrame(
            dxi,
            index=mod.dependent.panel.major_axis[1:],
            columns=mod.dependent.panel.minor_axis,
        )
        temp = temp.T.stack()
        temp = temp.reindex(mod.dependent.index)
        _dx[mod.exog.vars[i]] = temp
    dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index)
    for key in _dx:
        dx[key] = _dx[key]
    dx = dx[mod.exog.vars]

    w = mod.weights.values3d
    w = 1.0 / w
    sw = w[0, 1:] + w[0, :-1]
    sw = pd.DataFrame(
        sw,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    sw = sw.T.stack()
    sw = sw.reindex(mod.dependent.index)
    sw = 1.0 / sw
    sw = sw / sw.mean()

    drop = dy.isnull() | np.any(dx.isnull(), 1) | sw.isnull()
    dy = dy.loc[~drop]
    dx = dx.loc[~drop]
    sw = sw.loc[~drop]

    ols_mod = IV2SLS(dy, dx, None, None, weights=sw)
    ols_res = ols_mod.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols_mod.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(data.vc1)
    fd = mod.dependent.first_difference()
    ols_clusters = ols_clusters.dataframe.loc[fd.index]

    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Exemplo n.º 8
0
def test_firstdifference_ols(data):
    mod = FirstDifferenceOLS(data.y, data.x)
    res = mod.fit(debiased=False)

    y = mod.dependent.values3d
    x = mod.exog.values3d
    dy = np.array(y[0, 1:] - y[0, :-1])
    dy = pd.DataFrame(
        dy,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    dy = dy.T.stack()
    dy = dy.reindex(mod.dependent.index)

    dx = x[:, 1:] - x[:, :-1]
    _dx = {}
    for i, dxi in enumerate(dx):
        temp = pd.DataFrame(
            dxi,
            index=mod.dependent.panel.major_axis[1:],
            columns=mod.dependent.panel.minor_axis,
        )
        temp = temp.T.stack()
        temp = temp.reindex(mod.dependent.index)
        _dx[mod.exog.vars[i]] = temp
    dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index)
    for key in _dx:
        dx[key] = _dx[key]
    dx = dx[mod.exog.vars]
    drop = dy.isnull() | np.any(dx.isnull(), 1)
    dy = dy.loc[~drop]
    dx = dx.loc[~drop]

    ols_mod = IV2SLS(dy, dx, None, None)
    ols_res = ols_mod.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols_mod.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(data.vc1)
    fd = mod.dependent.first_difference()
    ols_clusters = ols_clusters.dataframe.loc[fd.index]
    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
    entity_clusters = mod.dependent.first_difference().entity_ids
    ols_res = ols_mod.fit(cov_type="clustered", clusters=entity_clusters)
    assert_results_equal(res, ols_res)

    ols_clusters["entity.clusters"] = entity_clusters
    ols_clusters = ols_clusters.astype(np.int32)
    res = mod.fit(cov_type="clustered",
                  cluster_entity=True,
                  clusters=data.vc1,
                  debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Exemplo n.º 9
0
def test_panel_entity_lsdv(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    if mod.has_constant:
        d = mod.dependent.dummies('entity', drop_first=True)
        z = np.ones_like(y)
        d_demean = d.values - z @ np.linalg.lstsq(z, d.values)[0]
    else:
        d = mod.dependent.dummies('entity', drop_first=False)
        d_demean = d.values

    xd = np.c_[x.values, d_demean]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns))

    ols_mod = IV2SLS(y, xd, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted', debiased=False)
    assert_results_equal(res, res2, test_fit=False)
    assert_allclose(res.rsquared_inclusive, res2.rsquared)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(data.vc1)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc2
    ols_clusters = mod.reformat_clusters(data.vc2)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
Exemplo n.º 10
0
def test_absorbed_option(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)
    mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=False)
    res_false = mod.fit(auto_df=False, count_effects=False, debiased=False)
    assert_results_equal(res, res_false)