def test_empty_columns(df):
    df["c"] = np.nan
    res = Description(df)
    dropped = res.frame.c.dropna()
    assert dropped.shape[0] == 2
    assert "missing" in dropped
    assert "nobs" in dropped

    df["c"] = np.nan
    res = Description(df.c)
    dropped = res.frame.dropna()
    assert dropped.shape[0] == 2
def test_extension_types(df):
    df["c"] = pd.Series(np.arange(100.0))
    df["d"] = pd.Series(np.arange(100), dtype=pd.Int64Dtype())
    df.loc[df.index[::2], "c"] = np.nan
    df.loc[df.index[::2], "d"] = pd.NA
    res = Description(df)
    np.testing.assert_allclose(res.frame.c, res.frame.d)
示例#3
0
def test_std_err(df):
    """
    Test the standard error of the mean matches result from scipy.stats.sem
    """
    np.testing.assert_allclose(
        Description(df["a"]).frame.loc["std_err"],
        scipy.stats.sem(df["a"])
    )
示例#4
0
def test_odd_percentiles(df):
    percentiles = np.linspace(7.0, 93.0, 13)
    res = Description(df, percentiles=percentiles)
    stats = [
        'nobs', 'missing', 'mean', 'std_err', 'upper_ci', 'lower_ci', 'std',
        'iqr', 'iqr_normal', 'mad', 'mad_normal', 'coef_var', 'range', 'max',
        'min', 'skew', 'kurtosis', 'jarque_bera', 'jarque_bera_pval', 'mode',
        'mode_freq', 'median', 'distinct', 'top_1', 'top_2', 'top_3', 'top_4',
        'top_5', 'freq_1', 'freq_2', 'freq_3', 'freq_4', 'freq_5', '7.0%',
        '14.1%', '21.3%', '28.5%', '35.6%', '42.8%', '50.0%', '57.1%', '64.3%',
        '71.5%', '78.6%', '85.8%', '93.0%']
    assert_equal(res.frame.index.tolist(), stats)
示例#5
0
def test_description_exceptions():
    df = pd.DataFrame(
        {"a": np.empty(100), "b": pd.Series(np.arange(100) % 10)},
        dtype="category",
    )
    with pytest.raises(ValueError):
        Description(df, stats=["unknown"])
    with pytest.raises(ValueError):
        Description(df, alpha=-0.3)
    with pytest.raises(ValueError):
        Description(df, percentiles=[0, 100])
    with pytest.raises(ValueError):
        Description(df, percentiles=[10, 20, 30, 10])
    with pytest.raises(ValueError):
        Description(df, ntop=-3)
    with pytest.raises(ValueError):
        Description(df, numeric=False, categorical=False)
def test_description_basic(df):
    res = Description(df)
    assert isinstance(res.frame, pd.DataFrame)
    assert isinstance(res.numeric, pd.DataFrame)
    assert isinstance(res.categorical, pd.DataFrame)
    assert isinstance(res.summary(), SimpleTable)
    assert isinstance(res.summary().as_text(), str)
    assert "Descriptive" in str(res)

    res = Description(df.a)
    assert isinstance(res.frame, pd.DataFrame)
    assert isinstance(res.numeric, pd.DataFrame)
    assert isinstance(res.categorical, pd.DataFrame)
    assert isinstance(res.summary(), SimpleTable)
    assert isinstance(res.summary().as_text(), str)
    assert "Descriptive" in str(res)

    res = Description(df.b)
    assert isinstance(res.frame, pd.DataFrame)
    assert isinstance(res.numeric, pd.DataFrame)
    assert isinstance(res.categorical, pd.DataFrame)
    assert isinstance(res.summary(), SimpleTable)
    assert isinstance(res.summary().as_text(), str)
    assert "Descriptive" in str(res)
def test_describe(df):
    pd.testing.assert_frame_equal(describe(df), Description(df).frame)
def test_special_stats(df, stat):
    all_stats = [st for st in Description.default_statistics]
    all_stats.remove(stat[0])
    res = Description(df, stats=all_stats)
    for val in stat[1]:
        assert val not in res.frame.index
def test_use_t(df):
    res = Description(df)
    res_t = Description(df, use_t=True)
    assert res_t.frame.a.lower_ci < res.frame.a.lower_ci
    assert res_t.frame.a.upper_ci > res.frame.a.upper_ci
def test_large_ntop(df):
    res = Description(df, ntop=15)
    assert "top_15" in res.frame.index
示例#11
0
def test_odd_percentiles(df):
    percentiles = np.linspace(7.0, 93.0, 13)
    res = Description(df, percentiles=percentiles)
    print(res.frame.index)