示例#1
0
def test_mean_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)
    entity_mean = x.mean("entity", weights=w)
    c = x.index.levels[0][get_codes(x.index)[0]]
    d = get_dummies(Categorical(c, ordered=True))
    d = d[entity_mean.index]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = lstsq(wd, wx, rcond=None)[0]
    assert_allclose(entity_mean, mu)

    time_mean = x.mean("time", weights=w)
    c = x.index.levels[1][get_codes(x.index)[1]]
    d = get_dummies(Categorical(c, ordered=True))
    d = d[list(time_mean.index)]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = pinv(wd) @ wx
    assert_allclose(time_mean, mu)
示例#2
0
def test_mean_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)
    entity_mean = x.mean('entity', weights=w)
    c = x.index.levels[0][x.index.labels[0]]
    d = pd.get_dummies(pd.Categorical(c, ordered=True))
    d = d[entity_mean.index]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = np.linalg.lstsq(wd, wx)[0]
    assert_allclose(entity_mean, mu)

    time_mean = x.mean('time', weights=w)
    c = x.index.levels[1][x.index.labels[1]]
    d = pd.get_dummies(pd.Categorical(c, ordered=True))
    d = d[time_mean.index]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = pinv(wd) @ wx
    assert_allclose(time_mean, mu)
示例#3
0
def test_mean_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)
    entity_mean = x.mean('entity', weights=w)
    c = x.index.levels[0][x.index.labels[0]]
    d = pd.get_dummies(pd.Categorical(c, ordered=True))
    d = d[entity_mean.index]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = np.linalg.lstsq(wd, wx)[0]
    assert_allclose(entity_mean, mu)

    time_mean = x.mean('time', weights=w)
    c = x.index.levels[1][x.index.labels[1]]
    d = pd.get_dummies(pd.Categorical(c, ordered=True))
    ilocs = [int(d.columns.get_indexer_for([i])) for i in time_mean.index]
    d = d.iloc[:, ilocs]
    # TODO: Restore when fixed in pandas
    # d = d[time_mean.index]
    d = d.values
    root_w = np.sqrt(w.values2d)
    wx = root_w * x.values2d
    wd = d * root_w
    mu = pinv(wd) @ wx
    assert_allclose(time_mean, mu)
示例#4
0
def test_mean_missing(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_mean = xpd.mean("entity")
    expected = xpd.dataframe.groupby(level=0).mean()
    expected = expected.loc[xpd.entities]
    expected.columns.name = None
    assert_frame_equal(entity_mean, expected)

    time_mean = xpd.mean("time")
    expected = xpd.dataframe.groupby(level=1).mean()
    expected = expected.loc[xpd.time]
    expected.columns.name = None
    assert_frame_equal(time_mean, expected)
示例#5
0
dfp.count() == dfp.nobs

`all()`は,列に対して全ての要素が`True`の場合のみ`True`を返すので,これを使い確認できる。

(dfp.count() == dfp.nobs).all()

`( )`はその中を先に評価する,という意味(数学と同じ)。変数が多い場合,`all()`を2回使うと全ての変数に対して評価するので便利である。

(dfp.count() == dfp.nobs).all().all()

`False`なので unbalanced panel data ということが確認できた。

---
変数の観察単位毎の平均の計算

dfp.mean()

---
変数の時間毎の平均の計算

dfp.mean('time')

---
変数の平均からの乖離 $x-\bar{x}$,$\bar{x}$は平均。

dfp.demean()

---
変数の1階階差の計算 $x_t-x_{t-1}$

dfp.first_difference()