def sync_repository(repository: Repository, username: str, override: MergeOverride, remote: str = "origin", access_token: str = None, pull_only: bool = False, id_token: str = None) -> int: p = os.getpid() logger = LMLogger.get_logger() logger.info(f"(Job {p}) Starting sync_repository({str(repository)})") def update_meta(msg): job = get_current_job() if not job: return if 'feedback' not in job.meta: job.meta['feedback'] = msg else: job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}' job.save_meta() try: with repository.lock(): if isinstance(repository, LabBook): wf = LabbookWorkflow(repository) else: wf = DatasetWorkflow(repository) # type: ignore cnt = wf.sync(username=username, remote=remote, override=override, feedback_callback=update_meta, access_token=access_token, id_token=id_token, pull_only=pull_only) logger.info(f"(Job {p} Completed sync_repository with cnt={cnt}") return cnt except Exception as e: logger.exception(f"(Job {p}) Error on sync_repository: {e}") raise
def publish_repository(repository: Repository, username: str, access_token: str, remote: Optional[str] = None, public: bool = False, id_token: str = None) -> None: p = os.getpid() logger = LMLogger.get_logger() logger.info(f"(Job {p}) Starting publish_repository({str(repository)})") def update_meta(msg): job = get_current_job() if not job: return if 'feedback' not in job.meta: job.meta['feedback'] = msg else: job.meta['feedback'] = job.meta['feedback'] + f'\n{msg}' job.save_meta() try: with repository.lock(): if isinstance(repository, LabBook): wf = LabbookWorkflow(repository) else: wf = DatasetWorkflow(repository) # type: ignore wf.publish(username=username, access_token=access_token, remote=remote or "origin", public=public, feedback_callback=update_meta, id_token=id_token) except Exception as e: logger.exception(f"(Job {p}) Error on publish_repository: {e}") raise
def sync_repository(repository: Repository, username: str, override: MergeOverride, remote: str = "origin", access_token: str = None, pull_only: bool = False, id_token: str = None) -> int: p = os.getpid() logger = LMLogger.get_logger() logger.info(f"(Job {p}) Starting sync_repository({str(repository)})") def update_feedback(msg: str, has_failures: Optional[bool] = None, failure_detail: Optional[str] = None, percent_complete: Optional[float] = None): """Method to update the job's metadata and provide feedback to the UI""" current_job = get_current_job() if not current_job: return if has_failures: current_job.meta['has_failures'] = has_failures if failure_detail: current_job.meta['failure_detail'] = failure_detail if percent_complete: current_job.meta['percent_complete'] = percent_complete current_job.meta['feedback'] = msg current_job.save_meta() try: update_feedback("Sync task in queue") with repository.lock(): if isinstance(repository, LabBook): wf = LabbookWorkflow(repository) else: wf = DatasetWorkflow(repository) # type: ignore cnt = wf.sync(username=username, remote=remote, override=override, feedback_callback=update_feedback, access_token=access_token, id_token=id_token, pull_only=pull_only) logger.info(f"(Job {p} Completed sync_repository with cnt={cnt}") return cnt except MergeConflict as me: logger.exception(f"(Job {p}) Merge conflict: {me}") raise except IOError: raise except Exception as e: logger.exception(e) raise Exception("Could not sync - try to log out and log in again.")
def test_publish__dataset(self, mock_config_file): def update_feedback(msg: str, has_failures: Optional[bool] = None, failure_detail: Optional[str] = None, percent_complete: Optional[float] = None): """Method to update the job's metadata and provide feedback to the UI""" assert has_failures is None or has_failures is False assert failure_detail is None def dispatch_query_mock(self, job_key): JobStatus = namedtuple("JobStatus", ['status', 'meta']) return JobStatus(status='finished', meta={'completed_bytes': '500'}) def dispatch_mock(self, method_reference, kwargs, metadata, persist): return "afakejobkey" username = '******' im = InventoryManager(mock_config_file[0]) ds = im.create_dataset(username, username, 'dataset-1', 'gigantum_object_v1') m = Manifest(ds, username) wf = DatasetWorkflow(ds) # Put a file into the dataset that needs to be pushed helper_append_file(m.cache_mgr.cache_root, m.dataset_revision, "test1.txt", "asdfadfsdf") m.sweep_all_changes() iom = IOManager(ds, m) assert len(glob.glob(f'{iom.push_dir}/*')) == 1 with patch.object(Dispatcher, 'dispatch_task', dispatch_mock): with patch.object(Dispatcher, 'query_task', dispatch_query_mock): wf.publish(username=username, feedback_callback=update_feedback) assert os.path.exists(wf.remote) assert len(glob.glob(f'{iom.push_dir}/*')) == 0
def test_import_from_remote__dataset(self, mock_labbook_lfs_disabled, mock_config_file): """ test importing a published dataset """ username = '******' lb = mock_labbook_lfs_disabled[2] im = InventoryManager(config_file=mock_labbook_lfs_disabled[0]) ds = im.create_dataset(username, username, 'test-ds', storage_type='gigantum_object_v1') wf = DatasetWorkflow(ds) wf.publish(username=username) other_user = '******' wf_other = DatasetWorkflow.import_from_remote( wf.remote, username=other_user, config_file=mock_config_file[0]) # The remotes must be the same, cause it's the same remote repo assert wf_other.remote == wf.remote # The actual path on disk will be different, though assert wf_other.repository != wf.repository # Check imported into namespace of original owner (testuser) assert f'{other_user}/{username}/datasets/test-ds' in wf_other.repository.root_dir
def publish_repository(repository: Repository, username: str, access_token: str, remote: Optional[str] = None, public: bool = False, id_token: str = None) -> None: p = os.getpid() logger = LMLogger.get_logger() logger.info(f"(Job {p}) Starting publish_repository({str(repository)})") def update_feedback(msg: str, has_failures: Optional[bool] = None, failure_detail: Optional[str] = None, percent_complete: Optional[float] = None): """Method to update the job's metadata and provide feedback to the UI""" current_job = get_current_job() if not current_job: return if has_failures: current_job.meta['has_failures'] = has_failures if failure_detail: current_job.meta['failure_detail'] = failure_detail if percent_complete: current_job.meta['percent_complete'] = percent_complete current_job.meta['feedback'] = msg current_job.save_meta() logger = LMLogger.get_logger() try: update_feedback("Publish task in queue") with repository.lock(): if isinstance(repository, LabBook): wf = LabbookWorkflow(repository) else: wf = DatasetWorkflow(repository) # type: ignore wf.publish(username=username, access_token=access_token, remote=remote or "origin", public=public, feedback_callback=update_feedback, id_token=id_token) except IOError: raise except Exception as e: logger.exception(e) raise Exception("Could not publish - try to log out and log in again.")
def mutate_and_get_payload(cls, root, info, owner, dataset_name, remote_url, client_mutation_id=None): username = get_logged_in_username() logger.info(f"Importing remote dataset from {remote_url}") ds = Dataset(author=get_logged_in_author()) default_remote = ds.client_config.config['git']['default_remote'] admin_service = None for remote in ds.client_config.config['git']['remotes']: if default_remote == remote: admin_service = ds.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if hasattr(info.context, 'headers' ) and "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) gl_mgr = GitLabManager(default_remote, admin_service=admin_service, access_token=token) gl_mgr.configure_git_credentials(default_remote, username) wf = DatasetWorkflow.import_from_remote(remote_url, username=username) ds = wf.dataset import_owner = InventoryManager().query_owner(ds) # TODO: Fix cursor implementation, this currently doesn't make sense cursor = base64.b64encode(f"{0}".encode('utf-8')) dsedge = DatasetConnection.Edge(node=DatasetObject(owner=import_owner, name=ds.name), cursor=cursor) return ImportRemoteDataset(new_dataset_edge=dsedge)
def test_checkout__linked_dataset(self, mock_labbook_lfs_disabled, mock_config_file): """ test checking out a branch in a project that pulls in a linked dataset""" def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'other-test-user2' assert kwargs['dataset_owner'] == 'testuser' assert kwargs['dataset_name'] == 'test-ds' # Inject mocked config file kwargs['config_file'] = mock_config_file[0] # Stop patching so job gets scheduled for real dispatcher_patch.stop() # Call same method as in mutation d = Dispatcher() res = d.dispatch_task( gtmcore.dispatcher.dataset_jobs.check_and_import_dataset, kwargs=kwargs, metadata=metadata) return res username = '******' lb = mock_labbook_lfs_disabled[2] im = InventoryManager(config_file=mock_labbook_lfs_disabled[0]) ds = im.create_dataset(username, username, 'test-ds', storage_type='gigantum_object_v1') # Publish dataset dataset_wf = DatasetWorkflow(ds) dataset_wf.publish(username=username) # Publish project labbook_wf = LabbookWorkflow(lb) labbook_wf.publish(username=username) # Switch branches labbook_wf.labbook.checkout_branch(branch_name="dataset-branch", new=True) # Link to project im.link_dataset_to_labbook(dataset_wf.remote, username, username, labbook_wf.labbook) # Publish branch labbook_wf.sync(username=username) # Import project other_user = '******' wf_other = LabbookWorkflow.import_from_remote( labbook_wf.remote, username=other_user, config_file=mock_config_file[0]) # The remotes must be the same, cause it's the same remote repo assert wf_other.remote == labbook_wf.remote assert wf_other.repository != labbook_wf.repository assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir with pytest.raises(InventoryException): im_other_user = InventoryManager(config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') # Patch dispatch_task so you can inject the mocked config file dispatcher_patch = patch.object(Dispatcher, 'dispatch_task', dispatcher_mock) dispatcher_patch.start() # Checkout the branch assert wf_other.labbook.active_branch == "master" wf_other.checkout(username=other_user, branch_name="dataset-branch") cnt = 0 while cnt < 20: try: im_other_user = InventoryManager( config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') break except InventoryException: cnt += 1 time.sleep(1) assert cnt < 20 assert ds.name == 'test-ds' assert ds.namespace == username assert mock_config_file[1] in ds.root_dir assert wf_other.labbook.active_branch == "dataset-branch"
def test_import_from_remote__linked_dataset(self, mock_labbook_lfs_disabled, mock_config_file): """ test importing a project with a linked dataset""" def dispatcher_mock(self, function_ref, kwargs, metadata): assert kwargs['logged_in_username'] == 'other-test-user2' assert kwargs['dataset_owner'] == 'testuser' assert kwargs['dataset_name'] == 'test-ds' # Inject mocked config file kwargs['config_file'] = mock_config_file[0] # Stop patching so job gets scheduled for real dispatcher_patch.stop() # Call same method as in mutation d = Dispatcher() res = d.dispatch_task( gtmcore.dispatcher.dataset_jobs.check_and_import_dataset, kwargs=kwargs, metadata=metadata) return res username = '******' lb = mock_labbook_lfs_disabled[2] im = InventoryManager(config_file=mock_labbook_lfs_disabled[0]) ds = im.create_dataset(username, username, 'test-ds', storage_type='gigantum_object_v1') # Publish dataset dataset_wf = DatasetWorkflow(ds) dataset_wf.publish(username=username) # Link to project im.link_dataset_to_labbook(dataset_wf.remote, username, username, lb) # Publish project labbook_wf = LabbookWorkflow(lb) labbook_wf.publish(username=username) # Patch dispatch_task so you can inject the mocked config file dispatcher_patch = patch.object(Dispatcher, 'dispatch_task', dispatcher_mock) dispatcher_patch.start() # Import project, triggering an auto-import of the dataset other_user = '******' wf_other = LabbookWorkflow.import_from_remote( labbook_wf.remote, username=other_user, config_file=mock_config_file[0]) # The remotes must be the same, cause it's the same remote repo assert wf_other.remote == labbook_wf.remote # The actual path on disk will be different, though assert wf_other.repository != labbook_wf.repository # Check imported into namespace of original owner (testuser) assert f'{other_user}/{username}/labbooks/labbook1' in wf_other.repository.root_dir cnt = 0 while cnt < 20: try: im_other_user = InventoryManager( config_file=mock_config_file[0]) ds = im_other_user.load_dataset(other_user, username, 'test-ds') break except InventoryException: cnt += 1 time.sleep(1) assert cnt < 20 assert ds.name == 'test-ds' assert ds.namespace == username assert mock_config_file[1] in ds.root_dir