Exemplo n.º 1
0
def create_remote_gitlab_repo(repository: Repository,
                              username: str,
                              visibility: str,
                              access_token: Optional[str] = None) -> None:
    """Create a new repository in GitLab,

    Note: It may make more sense to factor this out later on. """

    default_remote = repository.client_config.config['git']['default_remote']
    admin_service = None
    for remote in repository.client_config.config['git']['remotes']:
        if default_remote == remote:
            admin_service = repository.client_config.config['git']['remotes'][
                remote]['admin_service']
            break

    if not admin_service:
        raise ValueError('admin_service could not be found')

    try:
        # Add collaborator to remote service
        mgr = GitLabManager(default_remote,
                            admin_service,
                            access_token=access_token or 'invalid')
        mgr.configure_git_credentials(default_remote, username)
        mgr.create_labbook(
            namespace=InventoryManager().query_owner(repository),
            labbook_name=repository.name,
            visibility=visibility)
        repository.add_remote(
            "origin",
            f"https://{default_remote}/{username}/{repository.name}.git")
    except Exception as e:
        raise GitLabRemoteError(e)
Exemplo n.º 2
0
    def _configure_git(cls, lb, info) -> GitLabManager:
        # Extract valid Bearer token
        # TODO - This code is duplicated all over the place, must be refactored.
        token = None
        if hasattr(info.context.headers, 'environ'):
            if "HTTP_AUTHORIZATION" in info.context.headers.environ:
                token = parse_token(info.context.headers.environ["HTTP_AUTHORIZATION"])

        if not token:
            raise ValueError("Authorization header not provided. "
                             "Must have a valid session to query for collaborators")

        default_remote = lb.client_config.config['git']['default_remote']
        admin_service = None
        for remote in lb.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = lb.client_config.config['git']['remotes'][remote]['admin_service']
                break

        if not admin_service:
            raise ValueError('admin_service could not be found')

        # Configure git creds
        mgr = GitLabManager(default_remote, admin_service, access_token=token)
        mgr.configure_git_credentials(default_remote, get_logged_in_username())
        return mgr
Exemplo n.º 3
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               pull_only=False,
                               override_method="abort",
                               client_mutation_id=None):
        # Load LabBook
        username = get_logged_in_username()
        lb = InventoryManager().load_labbook(username,
                                             owner,
                                             labbook_name,
                                             author=get_logged_in_author())

        # Extract valid Bearer token
        token = None
        if hasattr(info.context.headers, 'environ'):
            if "HTTP_AUTHORIZATION" in info.context.headers.environ:
                token = parse_token(
                    info.context.headers.environ["HTTP_AUTHORIZATION"])

        if not token:
            raise ValueError(
                "Authorization header not provided. "
                "Must have a valid session to query for collaborators")

        default_remote = lb.client_config.config['git']['default_remote']
        admin_service = None
        for remote in lb.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = lb.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        if not admin_service:
            raise ValueError('admin_service could not be found')

        # Configure git creds
        mgr = GitLabManager(default_remote, admin_service, access_token=token)
        mgr.configure_git_credentials(default_remote, username)

        override = MergeOverride(override_method)

        job_metadata = {'method': 'sync_labbook', 'labbook': lb.key}
        job_kwargs = {
            'repository': lb,
            'pull_only': pull_only,
            'username': username,
            'override': override
        }
        dispatcher = Dispatcher()
        job_key = dispatcher.dispatch_task(jobs.sync_repository,
                                           kwargs=job_kwargs,
                                           metadata=job_metadata)
        logger.info(
            f"Syncing LabBook {lb.root_dir} in background job with key {job_key.key_str}"
        )

        return SyncLabbook(job_key=job_key.key_str)
Exemplo n.º 4
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               visibility,
                               client_mutation_id=None):
        # Load Dataset
        username = get_logged_in_username()
        ds = InventoryManager().load_dataset(username,
                                             owner,
                                             dataset_name,
                                             author=get_logged_in_author())
        # Extract valid Bearer token
        token = None
        if hasattr(info.context.headers, 'environ'):
            if "HTTP_AUTHORIZATION" in info.context.headers.environ:
                token = parse_token(
                    info.context.headers.environ["HTTP_AUTHORIZATION"])

        if not token:
            raise ValueError(
                "Authorization header not provided. Must have a valid session to query for collaborators"
            )

        default_remote = ds.client_config.config['git']['default_remote']
        admin_service = None
        for remote in ds.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = ds.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        if not admin_service:
            raise ValueError('admin_service could not be found')

        # Configure git creds
        mgr = GitLabManager(default_remote, admin_service, access_token=token)
        mgr.configure_git_credentials(default_remote, username)

        if visibility not in ['public', 'private']:
            raise ValueError(
                f'Visibility must be either "public" or "private";'
                f'("{visibility}" invalid)')
        with ds.lock():
            mgr.set_visibility(namespace=owner,
                               repository_name=dataset_name,
                               visibility=visibility)

        cursor = base64.b64encode(f"{0}".encode('utf-8'))
        dsedge = DatasetConnection.Edge(node=DatasetObject(owner=owner,
                                                           name=dataset_name),
                                        cursor=cursor)
        return SetDatasetVisibility(new_dataset_edge=dsedge)
Exemplo n.º 5
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               remote_url,
                               client_mutation_id=None):
        username = get_logged_in_username()
        logger.info(f"Importing remote labbook from {remote_url}")
        lb = LabBook(author=get_logged_in_author())
        default_remote = lb.client_config.config['git']['default_remote']
        admin_service = None
        for remote in lb.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = lb.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        # Extract valid Bearer token
        if hasattr(info.context, 'headers'
                   ) and "HTTP_AUTHORIZATION" in info.context.headers.environ:
            token = parse_token(
                info.context.headers.environ["HTTP_AUTHORIZATION"])
        else:
            raise ValueError(
                "Authorization header not provided. Must have a valid session to query for collaborators"
            )

        gl_mgr = GitLabManager(default_remote,
                               admin_service=admin_service,
                               access_token=token)
        gl_mgr.configure_git_credentials(default_remote, username)

        job_metadata = {'method': 'import_labbook_from_remote'}
        job_kwargs = {'remote_url': remote_url, 'username': username}

        dispatcher = Dispatcher()
        job_key = dispatcher.dispatch_task(jobs.import_labbook_from_remote,
                                           metadata=job_metadata,
                                           kwargs=job_kwargs)
        logger.info(
            f"Dispatched import_labbook_from_remote({remote_url}) to Job {job_key}"
        )

        return ImportRemoteLabbook(job_key=job_key.key_str)
Exemplo n.º 6
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               remote_url,
                               client_mutation_id=None):
        username = get_logged_in_username()
        logger.info(f"Importing remote dataset from {remote_url}")
        ds = Dataset(author=get_logged_in_author())

        default_remote = ds.client_config.config['git']['default_remote']
        admin_service = None
        for remote in ds.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = ds.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        # Extract valid Bearer token
        if hasattr(info.context, 'headers'
                   ) and "HTTP_AUTHORIZATION" in info.context.headers.environ:
            token = parse_token(
                info.context.headers.environ["HTTP_AUTHORIZATION"])
        else:
            raise ValueError(
                "Authorization header not provided. Must have a valid session to query for collaborators"
            )

        gl_mgr = GitLabManager(default_remote,
                               admin_service=admin_service,
                               access_token=token)
        gl_mgr.configure_git_credentials(default_remote, username)

        wf = DatasetWorkflow.import_from_remote(remote_url, username=username)
        ds = wf.dataset

        import_owner = InventoryManager().query_owner(ds)
        # TODO: Fix cursor implementation, this currently doesn't make sense
        cursor = base64.b64encode(f"{0}".encode('utf-8'))
        dsedge = DatasetConnection.Edge(node=DatasetObject(owner=import_owner,
                                                           name=ds.name),
                                        cursor=cursor)
        return ImportRemoteDataset(new_dataset_edge=dsedge)
Exemplo n.º 7
0
def check_and_import_dataset(logged_in_username: str,
                             dataset_owner: str,
                             dataset_name: str,
                             remote_url: str,
                             access_token: Optional[str] = None,
                             config_file: Optional[str] = None) -> None:
    """Job to check if a dataset exists in the user's working directory, and if not import it. This is primarily used
    when importing, syncing, or switching branches on a project with linked datasets

    Args:
        logged_in_username: username for the currently logged in user
        dataset_owner: Owner of the labbook if this dataset is linked
        dataset_name: Name of the labbook if this dataset is linked
        remote_url: URL of the dataset to import if needed
        access_token: The current user's access token, needed to initialize git credentials in certain situations
        config_file: config file (used for test mocking)

    Returns:
        None
    """
    logger = LMLogger.get_logger()

    p = os.getpid()
    try:
        logger.info(
            f"(Job {p}) Starting check_and_import_dataset(logged_in_username={logged_in_username},"
            f"dataset_owner={dataset_owner}, dataset_name={dataset_name}")

        im = InventoryManager(config_file=config_file)

        try:
            # Check for dataset already existing in the user's working directory
            im.load_dataset(logged_in_username, dataset_owner, dataset_name)
            logger.info(
                f"{logged_in_username}/{dataset_owner}/{dataset_name} exists. Skipping auto-import."
            )
            return
        except InventoryException:
            # Dataset not found, import it
            logger.info(
                f"{logged_in_username}/{dataset_owner}/{dataset_name} not found. "
                f"Auto-importing remote dataset from {remote_url}")
            config_obj = Configuration(config_file=config_file)

            if access_token:
                # If the access token is set, git creds should be configured
                remote_parts = urlsplit(remote_url)
                if remote_parts.netloc:
                    remote_target = f"{remote_parts.scheme}://{remote_parts.netloc}/"
                else:
                    remote_target = remote_parts.path

                admin_service = None
                for remote in config_obj.config['git']['remotes']:
                    if remote == remote_target:
                        admin_service = config_obj.config['git']['remotes'][
                            remote]['admin_service']
                        break

                if not admin_service:
                    raise ValueError(
                        f"Failed to configure admin service URL based on target remote: {remote_target}"
                    )

                gl_mgr = GitLabManager(remote_target,
                                       admin_service=admin_service,
                                       access_token=access_token)
                gl_mgr.configure_git_credentials(remote_target,
                                                 logged_in_username)

            gitworkflows_utils.clone_repo(
                remote_url=remote_url,
                username=logged_in_username,
                owner=dataset_owner,
                load_repository=im.load_dataset_from_directory,
                put_repository=im.put_dataset)
            logger.info(
                f"{logged_in_username}/{dataset_owner}/{dataset_name} auto-imported successfully"
            )

    except Exception as err:
        logger.error(f"(Job {p}) Error in check_and_import_dataset job")
        logger.exception(err)
        raise
Exemplo n.º 8
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               labbook_owner,
                               labbook_name,
                               dataset_owner,
                               dataset_name,
                               action,
                               dataset_url=None,
                               client_mutation_id=None):
        logged_in_username = get_logged_in_username()
        im = InventoryManager()
        lb = im.load_labbook(logged_in_username,
                             labbook_owner,
                             labbook_name,
                             author=get_logged_in_author())

        with lb.lock():
            if action == 'link':
                if dataset_url:
                    remote_domain = cls._get_remote_domain(
                        dataset_url, dataset_owner, dataset_name)

                    if remote_domain:
                        # Make sure git creds are configured for the remote
                        admin_service = None
                        for remote in lb.client_config.config['git'][
                                'remotes']:
                            if remote_domain == remote:
                                admin_service = lb.client_config.config['git'][
                                    'remotes'][remote]['admin_service']
                                break
                        if "HTTP_AUTHORIZATION" in info.context.headers.environ:
                            token = parse_token(info.context.headers.
                                                environ["HTTP_AUTHORIZATION"])
                        else:
                            raise ValueError(
                                "Authorization header not provided."
                                " Must have a valid session to query for collaborators"
                            )
                        mgr = GitLabManager(remote_domain, admin_service,
                                            token)
                        mgr.configure_git_credentials(remote_domain,
                                                      logged_in_username)
                else:
                    # Link to local dataset
                    ds = im.load_dataset(logged_in_username, dataset_owner,
                                         dataset_name)
                    dataset_url = f"{ds.root_dir}/.git"

                # Link the dataset to the labbook
                ds = im.link_dataset_to_labbook(dataset_url, dataset_owner,
                                                dataset_name, lb)
                ds.namespace = dataset_owner

                # Preload the dataloader
                info.context.dataset_loader.prime(
                    f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}",
                    ds)

                # Relink the revision
                m = Manifest(ds, logged_in_username)
                m.link_revision()
            elif action == 'unlink':
                im.unlink_dataset_from_labbook(dataset_owner, dataset_name, lb)
            elif action == 'update':
                ds = im.update_linked_dataset_reference(
                    dataset_owner, dataset_name, lb)
                m = Manifest(ds, logged_in_username)
                m.force_reload()

                info.context.dataset_loader.prime(
                    f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}",
                    ds)
            else:
                raise ValueError(
                    "Unsupported action. Use `link`, `unlink`, or `update`")

            info.context.labbook_loader.prime(
                f"{get_logged_in_username()}&{labbook_owner}&{labbook_name}",
                lb)
            edge = LabbookConnection.Edge(node=Labbook(owner=labbook_owner,
                                                       name=labbook_name),
                                          cursor=base64.b64encode(
                                              f"{0}".encode('utf-8')))

        return ModifyDatasetLink(new_labbook_edge=edge)