예제 #1
0
def test_unique_nan(df_factory):
    x = [np.nan, 0, 1, np.nan, 2, np.nan]
    df = df_factory(x=x)
    assert set(df.x.unique(dropnan=True)) == {0, 1, 2}
    assert dropnan(set(df.x.unique()), expect=1) == {0, 1, 2}
    with small_buffer(df, 2):
        values, indices = df.unique(df.x, return_inverse=True)
        values = np.array(values)
        values = values[indices]
        mask = np.isnan(values)
        assert values[~mask].tolist() == df.x.to_numpy()[~mask].tolist()
예제 #2
0
def test_set_float(repickle, nan, missing, nmaps):
    ar = np.arange(4, dtype='f8')[::-1].copy()
    keys_expected = [3, 2, 1, 0]
    null_index = 2
    if missing:
        mask = [0, 0, 1, 0]
        keys_expected[null_index] = None
    if nan:
        ar[1] = np.nan
        keys_expected[1] = np.nan
    oset = ordered_set_float64(nmaps)
    if missing:
        ordinals_local, map_index = oset.update(ar, mask, return_values=True)
    else:
        ordinals_local, map_index = oset.update(ar, return_values=True)
    ordinals = np.empty(len(keys_expected), dtype='i8')
    ordinals = oset.flatten_values(ordinals_local, map_index, ordinals)
    keys = oset.keys()
    # if missing:
    #     ordinals[oset.null_value] = oset.null_value
    assert dropnan(np.take(keys, ordinals).tolist()) == dropnan(keys_expected)

    # plain object keys
    oset.seal()
    keys = oset.keys()
    expect_nan = 1 if nan else None
    assert dropnan(set(keys), expect=expect_nan) == dropnan(set(keys_expected), expect=expect_nan)
    assert oset.map_ordinal(keys).dtype.name == 'int8'


    # arrays
    keys = oset.key_array().tolist()
    if missing:
        keys[oset.null_value] = None
    assert dropnan(set(keys), expect=expect_nan) == dropnan(set(keys_expected), expect=expect_nan)
    if nan:
        assert np.isnan(keys[oset.nan_value])
    ordinals = oset.map_ordinal(keys).tolist()
    if missing:
        ordinals[oset.null_value] = oset.null_value
    assert ordinals == list(range(4))

    # tests extraction and constructor
    keys = oset.key_array()
    set_copy = ordered_set_float64(keys, oset.null_value, oset.nan_count, oset.null_count, '')
    keys = set_copy.key_array().tolist()
    if missing:
        keys[oset.null_value] = None
    assert dropnan(set(keys)) == dropnan(set(keys_expected))
    if nan:
         assert np.isnan(keys[set_copy.nan_value])
    ordinals = set_copy.map_ordinal(keys).tolist()
    if missing:
        ordinals[set_copy.null_value] = set_copy.null_value
    assert ordinals == list(range(4))

    # test pickle
    set_copy = repickle(oset)
    keys = set_copy.key_array().tolist()
    if missing:
        keys[oset.null_value] = None
    assert dropnan(set(keys)) == dropnan(set(keys_expected))
    if nan:
        assert np.isnan(keys[set_copy.nan_value])
    ordinals = set_copy.map_ordinal(keys).tolist()
    if missing:
        ordinals[set_copy.null_value] = set_copy.null_value
    assert ordinals == list(range(4))
예제 #3
0
def test_unique_f4(df_factory):
    x = np.array([np.nan, 0, 1, np.nan, 2, np.nan], dtype='f4')
    df = df_factory(x=x)
    assert dropnan(set(df.x.unique(dropnan=True))) == {0, 1, 2}
    assert dropnan(set(df.x.unique()), expect=1) == {0, 1, 2}