示例#1
0
def test_demean_many_missing(mi_df):
    entities = mi_df.index.levels[0]
    times = mi_df.index.levels[1]
    skips = (3, 5, 2)
    for column, skip in zip(mi_df, skips):
        for entity in entities[::skip]:
            mi_df.loc[entity, column] = np.nan
        mi_df.index = mi_df.index.swaplevel()
        for time in times[::skip]:
            mi_df.loc[time, column] = np.nan
        mi_df.index = mi_df.index.swaplevel()
    data = PanelData(mi_df)
    fe = data.demean("entity")
    orig_nan = np.isnan(data.values3d.ravel())
    fe_nan = np.isnan(fe.values3d.ravel())
    assert np.all(fe_nan[orig_nan])
    expected = data.values3d.copy()
    for i in range(3):
        mu = np.ones(expected[i].shape[1]) * np.nan
        for j in range(expected[i].shape[1]):
            if np.any(np.isfinite(expected[i][:, j])):
                mu[j] = np.nanmean(expected[i][:, j])
        expected[i] -= mu
    assert_allclose(fe.values3d, expected)

    te = data.demean("time")
    expected = data.values3d.copy()
    for i in range(3):
        mu = np.ones((expected[i].shape[0], 1)) * np.nan
        for j in range(expected[i].shape[0]):
            if np.any(np.isfinite(expected[i][j])):
                mu[j, 0] = np.nanmean(expected[i][j])
        expected[i] -= mu
    assert_allclose(te.values3d, expected)
示例#2
0
def test_demean_against_dummy_regression(data):
    dh = PanelData(data.x)
    dh.drop(dh.isnull)

    df = dh.dataframe
    no_index = df.reset_index()

    cat = Categorical(no_index[df.index.levels[0].name])
    d = get_dummies(cat, drop_first=False).astype(np.float64)
    dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0]
    entity_demean = dh.demean("entity")
    assert_allclose(1 + np.abs(entity_demean.values2d),
                    1 + np.abs(dummy_demeaned))

    cat = Categorical(no_index[df.index.levels[1].name])
    d = get_dummies(cat, drop_first=False).astype(np.float64)
    dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0]
    time_demean = dh.demean("time")
    assert_allclose(1 + np.abs(time_demean.values2d),
                    1 + np.abs(dummy_demeaned))

    cat = Categorical(no_index[df.index.levels[0].name])
    d1 = get_dummies(cat, drop_first=False).astype(np.float64)
    cat = Categorical(no_index[df.index.levels[1].name])
    d2 = get_dummies(cat, drop_first=True).astype(np.float64)
    d = np.c_[d1.values, d2.values]
    dummy_demeaned = df.values - d @ lstsq(d, df.values, rcond=None)[0]
    both_demean = dh.demean("both")
    assert_allclose(1 + np.abs(both_demean.values2d),
                    1 + np.abs(dummy_demeaned))
示例#3
0
def test_demean_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)

    entity_demean = x.demean('entity', weights=w)
    d = pd.get_dummies(pd.Categorical(x.index.labels[0]))
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = wd @ np.linalg.lstsq(wd, wx)[0]
    e = wx - mu
    assert_allclose(1 + np.abs(entity_demean.values2d),
                    1 + np.abs(e))

    time_demean = x.demean('time', weights=w)
    d = pd.get_dummies(pd.Categorical(x.index.labels[1]))
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = wd @ np.linalg.lstsq(wd, wx)[0]
    e = wx - mu
    assert_allclose(1 + np.abs(time_demean.values2d),
                    1 + np.abs(e))
示例#4
0
def test_demean_many_missing(panel):
    panel.iloc[0, ::3] = np.nan
    panel.iloc[0, :, ::3] = np.nan
    panel.iloc[1, ::5] = np.nan
    panel.iloc[1, :, ::5] = np.nan
    panel.iloc[2, ::2] = np.nan
    panel.iloc[2, :, ::2] = np.nan
    data = PanelData(panel)
    fe = data.demean('entity')
    orig_nan = np.isnan(panel.values.ravel())
    fe_nan = np.isnan(fe.values3d.ravel())
    assert np.all(fe_nan[orig_nan])
    expected = panel.values.copy()
    for i in range(3):
        mu = np.ones(expected[i].shape[1]) * np.nan
        for j in range(expected[i].shape[1]):
            if np.any(np.isfinite(expected[i][:, j])):
                mu[j] = np.nanmean(expected[i][:, j])
        expected[i] -= mu
    assert_allclose(fe.values3d, expected)

    te = data.demean('time')
    expected = panel.values.copy()
    for i in range(3):
        mu = np.ones((expected[i].shape[0], 1)) * np.nan
        for j in range(expected[i].shape[0]):
            if np.any(np.isfinite(expected[i][j])):
                mu[j, 0] = np.nanmean(expected[i][j])
        expected[i] -= mu
    assert_allclose(te.values3d, expected)
示例#5
0
def test_demean_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)

    entity_demean = x.demean("entity", weights=w)
    d = get_dummies(Categorical(get_codes(x.index)[0]))
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = wd @ lstsq(wd, wx, rcond=None)[0]
    e = wx - mu
    assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(e))

    time_demean = x.demean("time", weights=w)
    d = get_dummies(Categorical(get_codes(x.index)[1]))
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = wd @ lstsq(wd, wx, rcond=None)[0]
    e = wx - mu
    assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(e))
示例#6
0
def test_general_unit_weighted_demean_oneway(mi_df):
    y = PanelData(mi_df)
    dm1 = y.demean("entity")
    g = PanelData(DataFrame(y.entity_ids, index=y.index))
    weights = PanelData(g).copy()
    weights.dataframe.iloc[:, :] = 1
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    dm1 = y.demean("time")
    g = PanelData(DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    g = PanelData(
        DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
    dm2 = y.general_demean(g, weights)
    dm3 = y.general_demean(g)
    g = Categorical(g.dataframe.iloc[:, 0])
    d = get_dummies(g)
    dm1 = y.values2d - d @ lstsq(d, y.values2d, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d)
    assert_allclose(dm3.values2d, dm2.values2d)
示例#7
0
def test_general_weighted_demean_oneway(panel):
    y = PanelData(panel)
    weights = pd.DataFrame(np.random.chisquare(10,
                                               (y.dataframe.shape[0], 1)) / 10,
                           index=y.index)
    w = PanelData(weights)

    dm1 = y.demean('entity', weights=w)
    g = PanelData(pd.DataFrame(y.entity_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean('time', weights=w)
    g = PanelData(pd.DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = PanelData(
        pd.DataFrame(np.random.randint(0, 10, g.dataframe.shape),
                     index=y.index))
    dm2 = y.general_demean(g, w)
    g = pd.Categorical(g.dataframe.iloc[:, 0])
    d = pd.get_dummies(g)
    wd = np.sqrt(w.values2d) * d
    wy = np.sqrt(w.values2d) * y.values2d
    dm1 = wy - wd @ np.linalg.lstsq(wd, wy)[0]
    assert_allclose(dm1, dm2.values2d, atol=1e-14)
示例#8
0
def test_general_weighted_demean_oneway(mi_df):
    y = PanelData(mi_df)
    weights = DataFrame(np.random.chisquare(10,
                                            (y.dataframe.shape[0], 1)) / 10,
                        index=y.index)
    w = PanelData(weights)

    dm1 = y.demean("entity", weights=w)
    g = PanelData(DataFrame(y.entity_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean("time", weights=w)
    g = PanelData(DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = PanelData(
        DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
    dm2 = y.general_demean(g, w)
    g = Categorical(g.dataframe.iloc[:, 0])
    d = get_dummies(g)
    wd = np.sqrt(w.values2d) * d
    wy = np.sqrt(w.values2d) * y.values2d
    dm1 = wy - wd @ lstsq(wd, wy, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d, atol=1e-14)
示例#9
0
def test_general_unit_weighted_demean_oneway(panel):
    y = PanelData(panel)
    dm1 = y.demean('entity')
    g = PanelData(pd.DataFrame(y.entity_ids, index=y.index))
    weights = PanelData(g).copy()
    weights.dataframe.iloc[:, :] = 1
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    dm1 = y.demean('time')
    g = PanelData(pd.DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    g = PanelData(pd.DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
    dm2 = y.general_demean(g, weights)
    dm3 = y.general_demean(g)
    g = pd.Categorical(g.dataframe.iloc[:, 0])
    d = pd.get_dummies(g)
    dm1 = y.values2d - d @ np.linalg.lstsq(d, y.values2d)[0]
    assert_allclose(dm1, dm2.values2d)
    assert_allclose(dm3.values2d, dm2.values2d)
示例#10
0
def test_demean_missing_alt_types(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_demean = xpd.demean('entity')
    expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
    assert_frame_equal(entity_demean.dataframe, expected)

    time_demean = xpd.demean('time')
    expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
    assert_frame_equal(time_demean.dataframe, expected)
示例#11
0
def test_demean(panel):
    data = PanelData(panel)
    fe = data.demean('entity')
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= expected[i].mean(0)
    assert_allclose(fe.values3d, expected)

    te = data.demean('time')
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= expected[i].mean(1)[:, None]
    assert_allclose(te.values3d, expected)
示例#12
0
def test_demean(mi_df):
    data = PanelData(mi_df)
    fe = data.demean("entity")
    expected = data.values3d.copy()
    for i in range(3):
        expected[i] -= expected[i].mean(0)
    assert_allclose(fe.values3d, expected)

    te = data.demean("time")
    expected = data.values3d.copy()
    for i in range(3):
        expected[i] -= expected[i].mean(1)[:, None]
    assert_allclose(te.values3d, expected)
示例#13
0
def test_demean_against_groupby(data):
    dh = PanelData(data.x)
    df = dh.dataframe

    def demean(x):
        return x - x.mean()

    entity_demean = df.groupby(level=0).transform(demean)
    res = dh.demean("entity")
    assert_allclose(entity_demean.values, res.values2d)

    time_demean = df.groupby(level=1).transform(demean)
    res = dh.demean("time")
    assert_allclose(time_demean.values, res.values2d)
示例#14
0
def test_demean_missing(mi_df):
    mi_df.values.flat[::13] = np.nan
    data = PanelData(mi_df)
    fe = data.demean("entity")
    expected = data.values3d.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 0)
    assert_allclose(fe.values3d, expected)

    te = data.demean("time")
    expected = data.values3d.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 1)[:, None]
    assert_allclose(te.values3d, expected)
示例#15
0
def test_demean_simple_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)
    w.dataframe.iloc[:, 0] = 1
    unweighted_entity_demean = x.demean('entity')
    weighted_entity_demean = x.demean('entity', weights=w)
    assert_allclose(unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe)

    unweighted_entity_demean = x.demean('time')
    weighted_entity_demean = x.demean('time', weights=w)
    assert_allclose(unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe)
示例#16
0
def test_demean_missing(panel):
    panel.values.flat[::13] = np.nan
    data = PanelData(panel)
    fe = data.demean('entity')
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 0)
    assert_allclose(fe.values3d, expected)

    te = data.demean('time')
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 1)[:, None]
    assert_allclose(te.values3d, expected)
示例#17
0
def test_demean_missing_alt_types(data):
    check = isinstance(data.x, (DataFrame, np.ndarray))
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_demean = xpd.demean('entity')
    expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
    assert_frame_equal(entity_demean.dataframe, expected,
                       check_index_type=check,
                       check_column_type=check)

    time_demean = xpd.demean('time')
    expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
    assert_frame_equal(time_demean.dataframe, expected,
                       check_index_type=check,
                       check_column_type=check)
示例#18
0
def test_general_unit_weighted_demean_twoway(mi_df):
    np.random.seed(12345)
    y = PanelData(mi_df)
    weights = DataFrame(np.random.chisquare(10,
                                            (y.dataframe.shape[0], 1)) / 10,
                        index=y.index)
    w = PanelData(weights)

    dm1 = y.demean("both", weights=w)
    g = DataFrame(y.entity_ids, index=y.index)
    g["column2"] = Series(y.time_ids.squeeze(), index=y.index)
    dm2 = y.general_demean(g, weights=w)
    assert_allclose(dm1.values2d - dm2.values2d,
                    np.zeros_like(dm2.values2d),
                    atol=1e-7)

    g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g, weights=w)
    g1 = Categorical(g.iloc[:, 0])
    d1 = get_dummies(g1)
    g2 = Categorical(g.iloc[:, 1])
    d2 = get_dummies(g2, drop_first=True)
    d = np.c_[d1, d2]
    wd = np.sqrt(w.values2d) * d
    wy = np.sqrt(w.values2d) * y.values2d
    dm1 = wy - wd @ lstsq(wd, wy, rcond=None)[0]
    assert_allclose(dm1 - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)
示例#19
0
def test_general_demean_oneway(panel):
    y = PanelData(panel)
    dm1 = y.demean('entity')
    g = pd.DataFrame(y.entity_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean('time')
    g = pd.DataFrame(y.time_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g = pd.Categorical(g.iloc[:, 0])
    d = pd.get_dummies(g)
    dm1 = y.values2d - d @ np.linalg.lstsq(d, y.values2d)[0]
    assert_allclose(dm1, dm2.values2d)
示例#20
0
def test_general_demean_oneway(mi_df):
    y = PanelData(mi_df)
    dm1 = y.demean("entity")
    g = DataFrame(y.entity_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean("time")
    g = DataFrame(y.time_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g = Categorical(g.iloc[:, 0])
    d = get_dummies(g)
    dm1 = y.values2d - d @ lstsq(d, y.values2d, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d)
示例#21
0
def test_demean_many_missing_dropped(panel):
    panel.iloc[0, ::3, ::3] = np.nan
    data = PanelData(panel)
    data.drop(data.isnull)
    fe = data.demean('entity')

    expected = data.values2d.copy()
    eid = data.entity_ids.ravel()
    for i in np.unique(eid):
        expected[eid == i] -= np.nanmean(expected[eid == i], 0)

    assert_allclose(fe.values2d, expected)
示例#22
0
def test_demean_both_large_t():
    data = PanelData(pd.Panel(np.random.standard_normal((1, 100, 10))))
    demeaned = data.demean('both')

    df = data.dataframe
    no_index = df.reset_index()
    cat = pd.Categorical(no_index[df.index.levels[0].name])
    d1 = pd.get_dummies(cat, drop_first=False).astype(np.float64)
    cat = pd.Categorical(no_index[df.index.levels[1].name])
    d2 = pd.get_dummies(cat, drop_first=True).astype(np.float64)
    d = np.c_[d1.values, d2.values]
    dummy_demeaned = df.values - d @ pinv(d) @ df.values
    assert_allclose(1 + np.abs(demeaned.values2d), 1 + np.abs(dummy_demeaned))
示例#23
0
def test_demean_many_missing(panel):
    panel.iloc[0, ::3] = np.nan
    panel.iloc[0, :, ::3] = np.nan
    panel.iloc[1, ::5] = np.nan
    panel.iloc[1, :, ::5] = np.nan
    panel.iloc[2, ::2] = np.nan
    panel.iloc[2, :, ::2] = np.nan
    data = PanelData(panel)
    fe = data.demean('entity')
    orig_nan = np.isnan(panel.values.ravel())
    fe_nan = np.isnan(fe.values3d.ravel())
    assert np.all(fe_nan[orig_nan])
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 0)
    assert_allclose(fe.values3d, expected)

    te = data.demean('time')
    expected = panel.values.copy()
    for i in range(3):
        expected[i] -= np.nanmean(expected[i], 1)[:, None]
    assert_allclose(te.values3d, expected)
示例#24
0
def test_general_demean_twoway(mi_df):
    y = PanelData(mi_df)
    dm1 = y.demean("both")
    g = DataFrame(y.entity_ids, index=y.index)
    g["column2"] = Series(y.time_ids.squeeze(), index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g1 = Categorical(g.iloc[:, 0])
    d1 = get_dummies(g1)
    g2 = Categorical(g.iloc[:, 1])
    d2 = get_dummies(g2, drop_first=True)
    d = np.c_[d1, d2]
    dm1 = y.values2d - d @ lstsq(d, y.values2d, rcond=None)[0]
    assert_allclose(dm1 - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)
示例#25
0
def test_demean_both_large_t():
    x = np.random.standard_normal((1, 100, 10))
    time = date_range("1-1-2000", periods=100)
    entities = ["entity.{0}".format(i) for i in range(10)]
    data = panel_to_frame(x, ["x"], time, entities, swap=True)
    data = PanelData(data)
    demeaned = data.demean("both")

    df = data.dataframe
    no_index = df.reset_index()
    cat = Categorical(no_index[df.index.levels[0].name])
    d1 = get_dummies(cat, drop_first=False).astype(np.float64)
    cat = Categorical(no_index[df.index.levels[1].name])
    d2 = get_dummies(cat, drop_first=True).astype(np.float64)
    d = np.c_[d1.values, d2.values]
    dummy_demeaned = df.values - d @ pinv(d) @ df.values
    assert_allclose(1 + np.abs(demeaned.values2d), 1 + np.abs(dummy_demeaned))
示例#26
0
def test_general_demean_twoway(panel):
    y = PanelData(panel)
    dm1 = y.demean('both')
    g = pd.DataFrame(y.entity_ids, index=y.index)
    g['column2'] = pd.Series(y.time_ids.squeeze(), index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g1 = pd.Categorical(g.iloc[:, 0])
    d1 = pd.get_dummies(g1)
    g2 = pd.Categorical(g.iloc[:, 1])
    d2 = pd.get_dummies(g2, drop_first=True)
    d = np.c_[d1, d2]
    dm1 = y.values2d - d @ np.linalg.lstsq(d, y.values2d)[0]
    assert_allclose(dm1 - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)
示例#27
0
def test_demean_many_missing_dropped(mi_df):
    entities = mi_df.index.levels[0]
    times = mi_df.index.levels[1]
    column = mi_df.columns[0]
    for entity in entities[::3]:
        mi_df.loc[entity, column] = np.nan
    mi_df.index = mi_df.index.swaplevel()
    for time in times[::3]:
        mi_df.loc[time, column] = np.nan
    mi_df.index = mi_df.index.swaplevel()

    data = PanelData(mi_df)
    data.drop(data.isnull)
    fe = data.demean("entity")

    expected = data.values2d.copy()
    eid = data.entity_ids.ravel()
    for i in np.unique(eid):
        expected[eid == i] -= np.nanmean(expected[eid == i], 0)

    assert_allclose(fe.values2d, expected)
示例#28
0
mod = PanelOLS(data.y, data.x, weights=data.w)
mod.fit()
mod = PanelOLS(y, x, weights=data.w, entity_effects=True)
mod.fit()
mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True)
mod.fit()
mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, entity_effects=True)
mod.fit()

missing = y.isnull | x.isnull | w.isnull
y.drop(missing)
x.drop(missing)
w.drop(missing)

x.dataframe.iloc[:, 0] = 1
ydw = y.demean(weights=w)
xdw = x.demean(weights=w)
d = x.dummies('entity', drop_first=False)
root_w = np.sqrt(w.values2d)
wd = root_w * d
wdx_direct = root_w * x.values2d - wd @ np.linalg.lstsq(wd, root_w * x.values2d)[0]
print(np.abs(wdx_direct[0] - xdw.values2d[0]) > 1e-14)

mux = (w.values2d * x.values2d).sum(0) / w.values2d.sum()
muy = (w.values2d * y.values2d).sum(0) / w.values2d.sum()
xx = xdw.values2d + root_w * mux
yy = ydw.values2d + root_w * muy.squeeze()
print(np.linalg.lstsq(xx, yy)[0])

yyy = root_w * y.values2d
xxx = root_w * x.values2d
示例#29
0
def test_demean_invalid(mi_df):
    data = PanelData(mi_df)
    with pytest.raises(ValueError):
        data.demean("unknown")
示例#30
0
def test_demean_invalid(panel):
    data = PanelData(panel)
    with pytest.raises(ValueError):
        data.demean('unknown')