def test_nested_lists_same_hash2(self): t1 = [1, 2, [3, [4, 5]]] t2 = [[[5, 4], 3], 2, 1] t1_hash = DeepHash(t1) t2_hash = DeepHash(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)])
def test_nested_lists_in_dictionary_same_hash(self): t1 = [{"c": 4}, {"c": 3}] t2 = [{"c": 3}, {"c": 4}] t1_hash = DeepHash(t1) t2_hash = DeepHash(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)])
def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] t2 = [{2, 3, 1}, "a"] t1_hash = DeepHash(t1) t2_hash = DeepHash(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)])
def test_get_reserved_keyword(self): hashes = {UNPROCESSED_KEY: 'full item', 'key1': ('item', 'count')} result = DeepHash._getitem(hashes, obj='key1') assert 'item' == result # For reserved keys, it should just grab the object instead of grabbing an item in the tuple object. result = DeepHash._getitem(hashes, obj=UNPROCESSED_KEY) assert 'full item' == result
def test_list_of_sets(self): a = {1} b = {2} obj = [a, b] result = DeepHash(obj) expected_result = {1, 2, get_id(a), get_id(b), get_id(obj)} assert set(result.keys()) == expected_result
def test_nested_lists_same_hash3(self): t1 = [{1: [2, 3], 4: [5, [6, 7]]}] t2 = [{4: [[7, 6], 5], 1: [3, 2]}] t1_hash = DeepHash(t1) t2_hash = DeepHash(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)])
def test_same_sets_same_hash(self): t1 = {1, 3, 2} t2 = {2, 3, 1} t1_hash = DeepHash(t1) t2_hash = DeepHash(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)])
def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): a = 'hello' b = b'hello' a_hash = DeepHash(a, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[a] b_hash = DeepHash(b, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[b] assert a_hash == b_hash
def test_list1(self): string1 = "a" obj = [string1, 10, 20] expected_result = { string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', get_id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', 10: DeepHash.sha1hex('int:10'), 20: DeepHash.sha1hex('int:20'), } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result
def test_datetime_truncate(self): a = datetime.datetime(2020, 5, 17, 22, 15, 34, 913070) b = datetime.datetime(2020, 5, 17, 22, 15, 39, 296583) c = datetime.datetime(2020, 5, 17, 22, 15, 34, 500000) a_hash = DeepHash(a, truncate_datetime='minute') b_hash = DeepHash(b, truncate_datetime='minute') assert a_hash[a] == b_hash[b] a_hash = DeepHash(a, truncate_datetime='second') c_hash = DeepHash(c, truncate_datetime='second') assert a_hash[a] == c_hash[c]
def test_hash_str_fail_if_mutable(self): """ This test fails if ContentHash is getting a mutable copy of hashes which means each init of the ContentHash will have hashes from the previous init. """ obj1 = "a" id_obj1 = id(obj1) expected_result = {id_obj1: '48591f1d794734cabf55f96f5a5a72c084f13ac0'} result = DeepHash(obj1, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) obj2 = "b" result = DeepHash(obj2, hasher=DeepHash.sha1hex) self.assertTrue(id_obj1 not in result)
def test_hash_str_fail_if_mutable(self): """ This test fails if ContentHash is getting a mutable copy of hashes which means each init of the ContentHash will have hashes from the previous init. """ obj1 = "a" id_obj1 = id(obj1) expected_result = {id_obj1: hash(obj1)} result = DeepHash(obj1) self.assertEqual(result, expected_result) obj2 = "b" result = DeepHash(obj2) self.assertTrue(id_obj1 not in result)
def test_prep_str_sha1_fail_if_mutable(self): """ This test fails if DeepHash is getting a mutable copy of hashes which means each init of the DeepHash will have hashes from the previous init. """ obj1 = "a" expected_result = { obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } result = DeepHash(obj1, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result obj2 = "b" result = DeepHash(obj2, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert obj1 not in result
def test_dict1(self): string1 = "a" key1 = "key1" obj = {key1: string1, 1: 10, 2: 20} expected_result = { 1: DeepHash.sha1hex('int:1'), 10: DeepHash.sha1hex('int:10'), 2: DeepHash.sha1hex('int:2'), 20: DeepHash.sha1hex('int:20'), key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327', string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result
def get_hash_of_model_and_data(model, data_description): hashable_description = [ data_description, type(model).__name__, model.get_params() ] return DeepHash(hashable_description)[hashable_description]
def test_deephash_items(self): obj = "a" result = list(DeepHash(obj).items()) assert [( 'a', '980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c') ] == result
def test_dict_count(self, obj, expected_count): """ How many object went to build this dict? """ result = DeepHash(obj).get(obj, extract_index=1) assert expected_count == result
def deephash(record): """ Return a :class:`DeepHash` of the given manifest *record*, ignoring the provenance information. """ return DeepHash(record, exclude_paths={f"root['{PROVENANCE_KEY}']"})[record]
def test_str_sha256(self): obj = "a" expected_result = { obj: 'ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb' } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha256hex) assert expected_result == result
def test_repetition_by_default_does_not_effect(self): list1 = [3, 4] list1_id = id(list1) a = [1, 2, list1] a_id = id(a) list2 = [4, 3, 3] list2_id = id(list2) b = [list2, 2, 1] b_id = id(b) hash_a = DeepHash(a) hash_b = DeepHash(b) self.assertEqual(hash_a[list1_id], hash_b[list2_id]) self.assertEqual(hash_a[a_id], hash_b[b_id])
def test_hash_str(self): obj = "a" expected_result = { id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result)
def cli( *command, print_command: bool = True, code_block: bool = True, max_height: Optional[int] = None, ): hashes = DeepHash(command) hash_str = hashes[command] cache_file: Path = Path(os.path.join(CACHE_DIR, hash_str)) if cache_file.is_file(): stdout = cache_file.read_text() else: try: result = subprocess.check_output(command, env=os_env_vars) stdout = result.decode() cache_file.write_text(stdout) except subprocess.CalledProcessError as e: print("stdout:") print(e.stdout) print("stderr:") print(e.stderr) raise e if print_command: stdout = f"> {' '.join(command)}\n{stdout}" if code_block: stdout = "``` console\n" + stdout + "\n```\n" if max_height is not None and max_height > 0: stdout = f"<div style='max-height:{max_height}px;overflow:auto'>\n{stdout}\n</div>" return stdout
def test_prep_str_murmur3_128bit(self): obj = "a" expected_result = { obj: 119173504597196970070553896747624927922 } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_128bit) assert expected_result == result
def test_prep_str_murmur3_64bit(self): obj = "a" expected_result = { obj: 424475663186367154 } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_64bit) assert expected_result == result
def test_bytecode(self): obj = b"a" expected_result = { obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result
def changed_fields(orig_dict, new_dict, skip=None): if not skip: skip = [] orig_copy = copy.deepcopy(orig_dict) if orig_dict else {} new_copy = copy.deepcopy(new_dict) if new_dict else {} # Ignore fields that the new_dict is missing. # for field in orig_dict: # if not field in new_dict: # orig_copy.pop(field, None) for field in skip: orig_copy.pop(field, None) new_copy.pop(field, None) orig_hash = DeepHash(orig_copy)[orig_copy] new_hash = DeepHash(new_copy)[new_copy] return orig_hash != new_hash
def test_list(self): string1 = "a" obj = [string1, 10, 20] expected_result = { id(string1): hash(string1), id(obj): 'list:int:10,int:20,str:%s' % hash(string1) } result = DeepHash(obj) self.assertEqual(result, expected_result)
def test_tuple(self): string1 = "a" obj = (string1, 10, 20) expected_result = { id(string1): hash(string1), id(obj): 'tuple:int:10,int:20,str:%s' % hash(string1) } result = DeepHash(obj) self.assertEqual(result, expected_result)
async def create_version_hash(self, **vars: Any) -> str: full_vars = await self.calculate_full_vars(**vars) id_dict: Dict[str, Any] = {} id_dict["vars"] = full_vars id_dict["pkg_name"] = self.name id_dict["pkg_index"] = self.bring_index.id hashes = DeepHash(id_dict) return hashes[id_dict]
def test_setting_repetition_off_unequal_hash(self): list1 = [3, 4] list1_id = id(list1) a = [1, 2, list1] a_id = id(a) list2 = [4, 3, 3] list2_id = id(list2) b = [list2, 2, 1] b_id = id(b) hash_a = DeepHash(a, ignore_repetition=False) hash_b = DeepHash(b, ignore_repetition=False) self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) self.assertNotEqual(hash_a[a_id], hash_b[b_id]) self.assertEqual(hash_a[list1_id].replace('3|1', '3|2'), hash_b[list2_id])