示例#1
0
def test_string_join_non_key(str_data, num_cols, how, how_raise):
    other_data = [1, 2, 3, 4, 5][:len(str_data)]

    pdf = pd.DataFrame()
    gdf = DataFrame()
    for i in range(num_cols):
        pdf[i] = pd.Series(str_data, dtype="str")
        gdf[i] = Series(str_data, dtype="str")
    pdf["a"] = other_data
    gdf["a"] = other_data

    pdf2 = pdf.copy()
    gdf2 = gdf.copy()

    expectation = raise_builder([how_raise], NotImplementedError)

    with expectation:
        expect = pdf.merge(pdf2, on=["a"], how=how)
        got = gdf.merge(gdf2, on=["a"], how=how)

        if len(expect) == 0 and len(got) == 0:
            expect = expect.reset_index(drop=True)
            got = got[expect.columns]

        assert_eq(expect, got)
示例#2
0
def test_onehot_get_dummies_simple():
    df = DataFrame({'x': np.arange(10)})
    original = df.copy()
    encoded = get_dummies(df, prefix='test')

    assert df == original  # the original df should be unchanged
    cols = list(encoded.columns)[1:]
    actual = DataFrame(dict(zip(cols, np.eye(len(cols)))))
    assert (encoded.loc[:, cols] == actual).all().all()
示例#3
0
def test_dataframe_empty_concat():
    gdf1 = DataFrame()
    gdf1['a'] = []
    gdf1['b'] = []

    gdf2 = gdf1.copy()

    gdf3 = gd.concat([gdf1, gdf2])
    assert len(gdf3) == 0
    assert len(gdf3.columns) == 2
示例#4
0
def test_dataframe_copy_shallow():
    # Test for copy dataframe using class method
    df = DataFrame()
    df['a'] = [1, 2, 3]
    df2 = df.copy()
    df2['b'] = [4, 2, 3]
    got = df.to_string()
    print(got)
    expect = '''
     a
0    1
1    2
2    3
'''
    # values should match despite whitespace difference
    assert got.split() == expect.split()
示例#5
0
def test_onehot_get_dummies_multicol(n_cols):
    from string import ascii_lowercase
    n_categories = 5
    df = DataFrame(
        dict(
            zip(ascii_lowercase,
                (np.arange(n_categories) for _ in range(n_cols)))))
    original = df.copy()
    encoded = get_dummies(df, prefix='test')

    assert df == original

    cols = list(encoded.columns)[n_cols:]
    actual = DataFrame(
        dict(
            zip(
                cols,
                np.concatenate(
                    list(np.eye(n_categories) for _ in range(n_cols))))))
    assert (encoded.loc[:, cols] == actual).all().all()