def test_no_reallocation_StringHashTable(N): keys = np.arange(N).astype(np.compat.unicode).astype(np.object_) preallocated_table = ht.StringHashTable(N) n_buckets_start = preallocated_table.get_state()["n_buckets"] preallocated_table.map_locations(keys) n_buckets_end = preallocated_table.get_state()["n_buckets"] # original number of buckets was enough: assert n_buckets_start == n_buckets_end # check with clean table (not too much preallocated) clean_table = ht.StringHashTable() clean_table.map_locations(keys) assert n_buckets_start == clean_table.get_state()["n_buckets"]
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = htable.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.signedinteger): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) elif np.issubdtype(values.dtype, np.unsignedinteger): table = htable.UInt64HashTable(len(values)) uniques = table.unique(_ensure_uint64(values)) else: # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: table = htable.StringHashTable(len(values)) else: table = htable.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def test_tracemalloc_for_empty_StringHashTable(): with activated_tracemalloc(): table = ht.StringHashTable() used = get_allocated_khash_memory() my_size = table.sizeof() assert used == my_size del table assert get_allocated_khash_memory() == 0
def test_tracemalloc_works_for_StringHashTable(): N = 1000 keys = np.arange(N).astype(np.compat.unicode).astype(np.object_) with activated_tracemalloc(): table = ht.StringHashTable() table.map_locations(keys) used = get_allocated_khash_memory() my_size = table.sizeof() assert used == my_size del table assert get_allocated_khash_memory() == 0
def test_string_hashtable_set_item_signature(self): # GH#30419 fix typing in StringHashTable.set_item to prevent segfault tbl = ht.StringHashTable() tbl.set_item("key", 1) assert tbl.get_item("key") == 1 with pytest.raises(TypeError, match="'key' has incorrect type"): # key arg typed as string, not object tbl.set_item(4, 6) with pytest.raises(TypeError, match="'val' has incorrect type"): tbl.get_item(4)