def _push_dataset_objects(self, dataset: Dataset, logged_in_username: str, feedback_callback: Callable, access_token, id_token) -> None: dataset.backend.set_default_configuration(logged_in_username, access_token, id_token) m = Manifest(dataset, logged_in_username) iom = IOManager(dataset, m) iom.push_objects(status_update_fn=feedback_callback) iom.manifest.link_revision()
def test_push_objects_with_failure(self, mock_dataset_with_manifest): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content 2") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 2 _, obj1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj2 = obj_to_push[1].object_path.rsplit('/', 1) with aioresponses() as mocked_responses: mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}', payload={ "presigned_url": f"https://dummyurl.com/{obj1}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj1, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1", payload={}, status=200) mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}', payload={ "presigned_url": f"https://dummyurl.com/{obj2}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj2, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1", payload={}, status=400) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') result = iom.push_objects() assert len(glob.glob(f'{iom.push_dir}/*')) == 1 assert len(result.success) == 1 assert len(result.failure) == 1 assert result.success[0].object_path == obj_to_push[0].object_path assert result.failure[0].object_path == obj_to_push[1].object_path
def push_dataset_objects(objs: List[PushObject], logged_in_username: str, access_token: str, id_token: str, dataset_owner: str, dataset_name: str, config_file: str = None) -> None: """Method to pull a collection of objects from a dataset's backend Args: objs: List if file PushObject to push logged_in_username: username for the currently logged in user access_token: bearer token id_token: identity token dataset_owner: Owner of the dataset containing the files to download dataset_name: Name of the dataset containing the files to download config_file: config file (used for test mocking) Returns: str: directory path of imported labbook """ logger = LMLogger.get_logger() def progress_update_callback(completed_bytes: int) -> None: """Method to update the job's metadata and provide feedback to the UI""" current_job = get_current_job() if not current_job: return if 'completed_bytes' not in current_job.meta: current_job.meta['completed_bytes'] = 0 current_job.meta['completed_bytes'] = int( current_job.meta['completed_bytes']) + completed_bytes current_job.save_meta() try: p = os.getpid() logger.info( f"(Job {p}) Starting push_dataset_objects(logged_in_username={logged_in_username}," f"dataset_owner={dataset_owner}, dataset_name={dataset_name}") im = InventoryManager(config_file=config_file) ds = im.load_dataset(logged_in_username, dataset_owner, dataset_name) ds.namespace = dataset_owner ds.backend.set_default_configuration(logged_in_username, access_token, id_token) m = Manifest(ds, logged_in_username) iom = IOManager(ds, m) result = iom.push_objects(objs, progress_update_fn=progress_update_callback) job = get_current_job() if job: job.meta['failures'] = ",".join([ f"{x.object_path}|{x.dataset_path}|{x.revision}" for x in result.failure ]) job.meta['message'] = result.message job.save_meta() except Exception as err: logger.exception(err) raise
def test_push_objects_deduplicate(self, mock_dataset_with_manifest, mock_dataset_head): ds, manifest, working_dir = mock_dataset_with_manifest iom = IOManager(ds, manifest) revision = manifest.dataset_revision os.makedirs( os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir")) helper_append_file(manifest.cache_mgr.cache_root, revision, "test1.txt", "test content 1") helper_append_file(manifest.cache_mgr.cache_root, revision, "test2.txt", "test content dup") helper_append_file(manifest.cache_mgr.cache_root, revision, "test3.txt", "test content dup") manifest.sweep_all_changes() obj_to_push = iom.objects_to_push() assert len(obj_to_push) == 3 _, obj1 = obj_to_push[0].object_path.rsplit('/', 1) _, obj2 = obj_to_push[1].object_path.rsplit('/', 1) _, obj3 = obj_to_push[2].object_path.rsplit('/', 1) assert obj1 != obj2 assert obj2 == obj3 with aioresponses() as mocked_responses: mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1}', payload={ "presigned_url": f"https://dummyurl.com/{obj1}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj1, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj1}?params=1", headers={'Etag': 'asdfasdf'}, status=200) mocked_responses.put( f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2}', payload={ "presigned_url": f"https://dummyurl.com/{obj2}?params=1", "namespace": ds.namespace, "key_id": "hghghg", "obj_id": obj2, "dataset": ds.name }, status=200) mocked_responses.put(f"https://dummyurl.com/{obj2}?params=1", headers={'Etag': '12341234'}, status=200) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 iom.dataset.backend.set_default_configuration( "test-user", "abcd", '1234') obj_to_push = iom.objects_to_push(remove_duplicates=True) result = iom.push_objects(obj_to_push, chunk_update_callback) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 assert len(result.success) == 2 assert len(result.failure) == 0 assert isinstance(result, PushResult) is True assert isinstance(result.success[0], PushObject) is True assert result.success[0].object_path != result.success[ 1].object_path assert result.success[0].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ] assert result.success[1].object_path in [ obj_to_push[0].object_path, obj_to_push[1].object_path ]