def test_init_calls_process_args(): d = PrehashedDict({rand_string(): rand_string()}) with patch('prehashed.prehashed.PrehashedDict._process_args') as mock: arg = random.choice([{ rand_string(): rand_string() }, ((rand_string(), rand_string())), (), d]) kwargs = {rand_string(): rand_string() for _ in rand_len(0, 2)} d = PrehashedDict(arg, **kwargs) mock.assert_called_once_with(arg, **kwargs)
def test_copy_same_keys(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) g = d.copy() for k in keys: assert k in g
def test_copy_same_values(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) g = d.copy() for k, v in zip(keys, values): assert g[k] == v
def graph(): data_loc = Path('dm_stories_tokenized') d = PrehashedDict() dic = {} mine = [] theirs = [] for file_name in data_loc.glob("*.story"): data = open(file_name, 'r').read() d[data] = 1 dic[data] = 1 mine.append(getsize(d.keys())) theirs.append(getsize(dic.keys())) x = range(len(mine)) plt.plot(x, mine) plt.plot(x, theirs) plt.show()
def test_copy_is_shallow(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) d['12'] = [1, 2, 3] g = d.copy() g['12'].append(4) assert d['12'] == [1, 2, 3, 4]
def test_copy_delete_one_old(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) g = d.copy() del_key = random.choice(keys) del d[del_key] assert del_key in g assert del_key not in d
def main(): data_loc = Path('dm_stories_tokenized') pre_d = PrehashedDict() pre_d2 = PrehashedDict(hash_fn=hash) d = {} for file_name in data_loc.glob("*.story"): data = open(file_name, "r").read() counts = Counter(data.split()) pre_d[data] = counts pre_d2[data] = counts d[data] = counts print(f"Len of normal: {len(d)}, sha1: {len(pre_d)}, hash: {len(pre_d2)}") print(f"Size of keys with strings: {getsize(d.keys())}") print(f"Size of my prehased keys: {getsize(pre_d.keys())}") print(f"Size of my prehased keys (builtin hash): {getsize(pre_d2.keys())}")
def test_copy_change_one_old(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) g = d.copy() new_val = rand_string() changed_key = random.choice(keys) d[changed_key] = new_val assert d[changed_key] == new_val assert g[changed_key] != new_val
def test_initial_add(): key = rand_string() value = rand_string() new_value = rand_string() d = PrehashedDict(((key, value), )) new_key = d.initial_add(key, new_value) assert d[new_key] == new_value assert d[key] == value assert key != new_key assert len(new_key) > len(key)
def test_copy_add_one_old(d): keys = [rand_string() for _ in rand_len()] values = [rand_string() for _ in keys] d = PrehashedDict(zip(keys, values)) g = d.copy() new_key = rand_string() new_val = rand_string() d[new_key] = new_val assert d[new_key] == new_val assert new_key in d assert new_key not in g
def test_pickle(file_cleanup): file_name = file_cleanup keys = [rand_string() for _ in rand_len()] vals = [rand_string() for _ in keys] d = PrehashedDict({k: v for k, v in zip(keys, vals)}) pickle.dump(d, open(file_name, 'wb')) d1 = pickle.load(open(file_name, 'rb')) for k in keys: assert k in d assert k in d1 for k, v in zip(keys, vals): assert d1[k] == v assert d == d1
def test_process_args_prehased_kwargs(d): d1 = PrehashedDict({rand_string(): rand_string() for _ in rand_len()}) kwargs = {rand_string(): rand_string() for _ in rand_len()} res = list(d._process_args(d1, **kwargs)) assert find(d1.items(), res, hash_me=False) assert find(kwargs.items(), res)
def test_from_keys_value(d): keys = [rand_string() for _ in rand_len()] value = rand_string() d = PrehashedDict.fromkeys(keys, value) for k in keys: assert d[k] == value
def test_from_keys(): keys = [rand_string() for _ in rand_len()] d = PrehashedDict.fromkeys(keys) for k in keys: assert d[k] == None
def d(): return PrehashedDict()