def test_unique_nan(df_factory): x = [np.nan, 0, 1, np.nan, 2, np.nan] df = df_factory(x=x) assert set(df.x.unique(dropnan=True)) == {0, 1, 2} assert dropnan(set(df.x.unique()), expect=1) == {0, 1, 2} with small_buffer(df, 2): values, indices = df.unique(df.x, return_inverse=True) values = np.array(values) values = values[indices] mask = np.isnan(values) assert values[~mask].tolist() == df.x.to_numpy()[~mask].tolist()
def test_set_float(repickle, nan, missing, nmaps): ar = np.arange(4, dtype='f8')[::-1].copy() keys_expected = [3, 2, 1, 0] null_index = 2 if missing: mask = [0, 0, 1, 0] keys_expected[null_index] = None if nan: ar[1] = np.nan keys_expected[1] = np.nan oset = ordered_set_float64(nmaps) if missing: ordinals_local, map_index = oset.update(ar, mask, return_values=True) else: ordinals_local, map_index = oset.update(ar, return_values=True) ordinals = np.empty(len(keys_expected), dtype='i8') ordinals = oset.flatten_values(ordinals_local, map_index, ordinals) keys = oset.keys() # if missing: # ordinals[oset.null_value] = oset.null_value assert dropnan(np.take(keys, ordinals).tolist()) == dropnan(keys_expected) # plain object keys oset.seal() keys = oset.keys() expect_nan = 1 if nan else None assert dropnan(set(keys), expect=expect_nan) == dropnan(set(keys_expected), expect=expect_nan) assert oset.map_ordinal(keys).dtype.name == 'int8' # arrays keys = oset.key_array().tolist() if missing: keys[oset.null_value] = None assert dropnan(set(keys), expect=expect_nan) == dropnan(set(keys_expected), expect=expect_nan) if nan: assert np.isnan(keys[oset.nan_value]) ordinals = oset.map_ordinal(keys).tolist() if missing: ordinals[oset.null_value] = oset.null_value assert ordinals == list(range(4)) # tests extraction and constructor keys = oset.key_array() set_copy = ordered_set_float64(keys, oset.null_value, oset.nan_count, oset.null_count, '') keys = set_copy.key_array().tolist() if missing: keys[oset.null_value] = None assert dropnan(set(keys)) == dropnan(set(keys_expected)) if nan: assert np.isnan(keys[set_copy.nan_value]) ordinals = set_copy.map_ordinal(keys).tolist() if missing: ordinals[set_copy.null_value] = set_copy.null_value assert ordinals == list(range(4)) # test pickle set_copy = repickle(oset) keys = set_copy.key_array().tolist() if missing: keys[oset.null_value] = None assert dropnan(set(keys)) == dropnan(set(keys_expected)) if nan: assert np.isnan(keys[set_copy.nan_value]) ordinals = set_copy.map_ordinal(keys).tolist() if missing: ordinals[set_copy.null_value] = set_copy.null_value assert ordinals == list(range(4))
def test_unique_f4(df_factory): x = np.array([np.nan, 0, 1, np.nan, 2, np.nan], dtype='f4') df = df_factory(x=x) assert dropnan(set(df.x.unique(dropnan=True))) == {0, 1, 2} assert dropnan(set(df.x.unique()), expect=1) == {0, 1, 2}