def test_objects_to_push_deduped(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "test3.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test4.txt", "test content 4") manifest.sweep_all_changes() # Write a .DS_Store file in the objects dir to make sure it gets skipped with open( os.path.join(manifest.cache_mgr.cache_root, 'objects', '.push', '.DS_Store'), 'wt') as ff: ff.write("") obj_to_push = iom.objects_to_push(remove_duplicates=True) assert len(obj_to_push) == 2 assert obj_to_push[0].dataset_path == "other_dir/test4.txt" assert obj_to_push[1].dataset_path == "test1.txt" assert iom.num_objects_to_push(remove_duplicates=True) == 2
def test_objects_to_push(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") helper_append_file(manifest.cache_mgr.cache_root, revision, "other_dir/test4.txt", "test content 4") manifest.sweep_all_changes() # Modify file to have 2 objects with same key helper_append_file(manifest.cache_mgr.cache_root, iom.manifest.dataset_revision, "test2.txt", "test content 22") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 4 assert obj_to_push[0].dataset_path == "other_dir/test4.txt" assert obj_to_push[1].dataset_path == "test1.txt" assert obj_to_push[2].dataset_path == "test2.txt" assert obj_to_push[3].dataset_path == "test2.txt" assert obj_to_push[2].revision != obj_to_push[3].revision assert iom.num_objects_to_push() == 4