Exemplo n.º 1
0
def integration_repo(headers, url_components):
    """With integration repo helper."""
    from renku.core.utils.contexts import chdir

    with chdir(integration_repo_path(headers, url_components)):
        repo = Repo('.')
        yield repo
Exemplo n.º 2
0
def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset, commit_message, name, url):
    """Add a remote file to a specified dataset."""
    user = cache.ensure_user(user)
    worker_log.debug((f"executing dataset add remote " f"file job for {user.user_id}:{user.fullname}"))

    user_job = cache.get_job(user, user_job_id)
    user_job.in_progress()

    try:
        worker_log.debug(f"checking metadata for project {project_id}")
        project = cache.get_project(user, project_id)

        with chdir(project.abs_path):
            urls = url if isinstance(url, list) else [url]

            worker_log.debug(f"adding files {urls} to dataset {name}")
            add_file(urls, name, create=create_dataset, commit_message=commit_message)

            worker_log.debug("operation successful - syncing with remote")
            _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin")
            user_job.update_extras("remote_branch", remote_branch)

            user_job.complete()
            worker_log.debug("job completed")
    except (HTTPError, BaseException, GitCommandError, RenkuException) as exp:
        user_job.fail_job(str(exp))

        # Reraise exception, so we see trace in job metadata
        # and in metrics as failed job.
        raise exp
Exemplo n.º 3
0
def unlink_file_view(user_data, cache):
    """Unlink a file from a dataset."""
    ctx = DatasetUnlinkRequest().load(request.json)

    include = ctx.get("include_filter")
    exclude = ctx.get("exclude_filter")

    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx["project_id"])

    if ctx.get("commit_message") is None:
        if include and exclude:
            filters = "-I {0} -X {1}".format(include, exclude)
        elif not include and exclude:
            filters = "-X {0}".format(exclude)
        else:
            filters = "-I {0}".format(include)

        ctx["commit_message"] = "service: unlink dataset {0} {1}".format(
            ctx["name"], filters)

    with chdir(project.abs_path):
        records = file_unlink(
            name=ctx["name"],
            include=ctx.get("include_filters"),
            exclude=ctx.get("exclude_filters"),
            yes=True,
            interactive=False,
            commit_message=ctx["commit_message"],
        )

        unlinked = [record.path for record in records]

    return result_response(DatasetUnlinkResponseRPC(), {"unlinked": unlinked})
Exemplo n.º 4
0
def dataset_import(
    cache, user, user_job_id, project_id, dataset_uri, name=None, extract=False, timeout=None,
):
    """Job for dataset import."""
    user = cache.ensure_user(user)
    worker_log.debug(f"executing dataset import job for {user.user_id}:{user.fullname}")

    user_job = cache.get_job(user, user_job_id)
    user_job.in_progress()

    try:
        worker_log.debug(f"retrieving metadata for project {project_id}")
        project = cache.get_project(user, project_id)
        with chdir(project.abs_path):
            worker_log.debug(f"project found in cache - importing dataset {dataset_uri}")
            import_dataset(
                dataset_uri,
                name,
                extract,
                commit_message=f"service: dataset import {dataset_uri}",
                progress=DatasetImportJobProcess(cache, user_job),
            )

            worker_log.debug("operation successful - syncing with remote")
            _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin")
            user_job.update_extras("remote_branch", remote_branch)

            user_job.complete()
            worker_log.debug("job completed")
    except (HTTPError, ParameterError, RenkuException, GitCommandError) as exp:
        user_job.fail_job(str(exp))

        # Reraise exception, so we see trace in job metadata
        # and in metrics as failed job.
        raise exp
Exemplo n.º 5
0
def unlink_file_view(user_data, cache):
    """Unlink a file from a dataset."""
    ctx = DatasetUnlinkRequest().load(request.json)

    include = ctx.get('include_filter')
    exclude = ctx.get('exclude_filter')

    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx['project_id'])

    if ctx.get('commit_message') is None:
        if include and exclude:
            filters = '-I {0} -X {0}'.format(include, exclude)
        elif not include and exclude:
            filters = '-X {0}'.format(exclude)
        else:
            filters = '-I {0}'.format(include)

        ctx['commit_message'] = ('service: unlink dataset {0} {1}'.format(
            ctx['short_name'], filters))

    with chdir(project.abs_path):
        records = file_unlink(short_name=ctx['short_name'],
                              include=ctx.get('include_filters'),
                              exclude=ctx.get('exclude_filters'),
                              yes=True,
                              interactive=False,
                              commit_message=ctx['commit_message'])

        unlinked = [record.path for record in records]

    return result_response(DatasetUnlinkResponseRPC(), {'unlinked': unlinked})
Exemplo n.º 6
0
def edit_dataset_view(user_data, cache):
    """Edit dataset metadata."""
    ctx = DatasetEditRequest().load(request.json)

    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx['project_id'])

    if ctx.get('commit_message') is None:
        ctx['commit_message'] = 'service: dataset edit {0}'.format(
            ctx['short_name'])

    with chdir(project.abs_path):
        edited, warnings = edit_dataset(ctx['short_name'],
                                        ctx.get('title'),
                                        ctx.get('description'),
                                        ctx.get('creators'),
                                        keywords=ctx.get('keywords'),
                                        commit_message=ctx['commit_message'])

    return result_response(
        DatasetEditResponseRPC(), {
            'edited': {field: ctx.get(field)
                       for field in edited},
            'warnings': warnings
        })
Exemplo n.º 7
0
def create_dataset_view(user, cache):
    """Create a new dataset in a project."""
    ctx = DatasetCreateRequest().load(request.json)
    project = cache.get_project(user, ctx['project_id'])

    project_path = make_project_path(user, project)
    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'message': 'invalid project_id argument',
            })

    with chdir(project_path):
        create_dataset(
            ctx['dataset_name'],
            commit_message=ctx['commit_message'],
            creators=ctx.get('creators'),
            description=ctx.get('description'),
        )

    if not repo_sync(project_path):
        return jsonify(
            error={
                'code': INTERNAL_FAILURE_ERROR_CODE,
                'reason': 'push to remote failed silently - try again'
            })

    return jsonify(DatasetCreateResponseRPC().load(
        {'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE)}))
Exemplo n.º 8
0
def edit_dataset_view(user_data, cache):
    """Edit dataset metadata."""
    ctx = DatasetEditRequest().load(request.json)

    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx["project_id"])

    if ctx.get("commit_message") is None:
        ctx["commit_message"] = "service: dataset edit {0}".format(ctx["name"])

    with chdir(project.abs_path):
        edited, warnings = edit_dataset(
            ctx["name"],
            ctx.get("title"),
            ctx.get("description"),
            ctx.get("creators"),
            keywords=ctx.get("keywords"),
            commit_message=ctx["commit_message"],
        )

    return result_response(
        DatasetEditResponseRPC(), {
            "edited": {field: ctx.get(field)
                       for field in edited},
            "warnings": warnings
        })
Exemplo n.º 9
0
def add_file_to_dataset_view(user_data, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    user = cache.ensure_user(user_data)
    project = cache.get_project(user, ctx['project_id'])

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['short_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_url' in _file:
            commit_message = '{0}{1}'.format(ctx['commit_message'],
                                             _file['file_url'])

            job = cache.make_job(user)
            _file['job_id'] = job.job_id

            with enqueue_retry(DATASETS_JOB_QUEUE) as queue:
                queue.enqueue(dataset_add_remote_file, user_data, job.job_id,
                              project.project_id, ctx['create_dataset'],
                              commit_message, ctx['short_name'],
                              _file['file_url'])
            continue

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = file.abs_path

        elif 'file_path' in _file:
            local_path = project.abs_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return error_response(
                INVALID_PARAMS_ERROR_CODE,
                'invalid file reference: {0}'.format(json.dumps(_file)))

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    if local_paths:
        with chdir(project.abs_path):
            add_file(local_paths,
                     ctx['short_name'],
                     create=ctx['create_dataset'],
                     force=ctx['force'],
                     commit_message=ctx['commit_message'])

            try:
                _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path),
                                                    remote='origin')
            except GitCommandError:
                return error_response(INTERNAL_FAILURE_ERROR_CODE,
                                      'repo sync failed')

    return result_response(DatasetAddResponseRPC(), ctx)
Exemplo n.º 10
0
def test_unlink_default(directory_tree, client):
    """Test unlink default behaviour."""
    with chdir(client.path):
        create_dataset("dataset")
        add_file([directory_tree.join("dir2").strpath], "dataset")

    with pytest.raises(ParameterError):
        file_unlink("dataset", (), ())
Exemplo n.º 11
0
def datapack_tar(directory_tree):
    """Returns dummy data folder as a tar archive."""
    from renku.core.utils.contexts import chdir
    workspace_dir = tempfile.TemporaryDirectory()
    with chdir(workspace_dir.name):
        shutil.make_archive('datapack', 'tar', str(directory_tree))

    yield Path(workspace_dir.name) / 'datapack.tar'
Exemplo n.º 12
0
def test_cli_initialization_no_err_help(cmd, runner):
    """Test allowed commands within non-renku repository."""
    from renku.core.utils.contexts import chdir
    sys.argv = cmd
    with tempfile.TemporaryDirectory() as tmpdir:
        with chdir(tmpdir):
            result = runner.invoke(cli, cmd)
            assert 0 == result.exit_code
Exemplo n.º 13
0
def datapack_zip(directory_tree):
    """Returns dummy data folder as a zip archive."""
    from renku.core.utils.contexts import chdir

    workspace_dir = tempfile.TemporaryDirectory()
    with chdir(workspace_dir.name):
        shutil.make_archive("datapack", "zip", str(directory_tree))

    yield Path(workspace_dir.name) / "datapack.zip"
Exemplo n.º 14
0
def list_datasets_view(user, cache):
    """List all datasets in project."""
    ctx = DatasetListRequest().load(request.args)
    project = cache.get_project(cache.ensure_user(user), ctx['project_id'])

    with chdir(project.abs_path):
        ctx['datasets'] = list_datasets()

    return result_response(DatasetListResponseRPC(), ctx)
Exemplo n.º 15
0
def list_dataset_files_view(user, cache):
    """List files in a dataset."""
    ctx = DatasetFilesListRequest().load(request.args)
    project = cache.get_project(cache.ensure_user(user), ctx['project_id'])

    with chdir(project.abs_path):
        ctx['files'] = list_files(datasets=[ctx['short_name']])

    return result_response(DatasetFilesListResponseRPC(), ctx)
Exemplo n.º 16
0
def add_file_to_dataset_view(user, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    project = cache.get_project(user, ctx['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'message': 'invalid project_id: {0}'.format(ctx['project_id']),
            })

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['dataset_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = make_file_path(user, file)

        elif 'file_path' in _file:
            local_path = project_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return jsonify(
                error={
                    'code':
                    INVALID_PARAMS_ERROR_CODE,
                    'message':
                    'invalid file reference: {0}'.format(
                        local_path.relative_to(project_path))
                })

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    with chdir(project_path):
        add_file(local_paths,
                 ctx['dataset_name'],
                 create=ctx['create_dataset'],
                 commit_message=ctx['commit_message'])

        if not repo_sync(project_path):
            return jsonify(error={
                'code': INTERNAL_FAILURE_ERROR_CODE,
                'message': 'repo sync failed'
            })

    return jsonify(DatasetAddResponseRPC().load(
        {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
Exemplo n.º 17
0
def migration_check_project_view(user_data, cache):
    """Migrate specified project."""
    user = cache.ensure_user(user_data)
    project = cache.get_project(user, request.json['project_id'])

    with chdir(project.abs_path):
        migration_required, project_supported = migrations_check()

    return result_response(
        ProjectMigrationCheckResponseRPC(), {
            'migration_required': migration_required,
            'project_supported': project_supported
        })
Exemplo n.º 18
0
def test_cli_initialization_err(cmd, runner):
    """Test correct exception raise within non-renku repository."""
    from renku.core.utils.contexts import chdir

    with tempfile.TemporaryDirectory() as tmpdir:
        with chdir(tmpdir):
            result = runner.invoke(cli, ["--disable-version-check"] + cmd)
            assert 2 == result.exit_code

            expected_output = ("Error: `.` is not a renku repository.\n"
                               "To initialize this as a "
                               "renku repository use: `renku init`\n")
            assert expected_output == result.output
Exemplo n.º 19
0
def subdirectory(request):
    """Runs tests in root directory and a subdirectory."""
    from renku.core.utils.contexts import chdir

    if request.param != '.':
        path = Path(request.param) / '.gitkeep'
        path.parent.mkdir(parents=True, exist_ok=True)
        path.touch()
        Repo().git.add(str(path))
        Repo().index.commit('Create subdirectory')

    with chdir(request.param):
        yield
Exemplo n.º 20
0
def subdirectory(project, request):
    """Runs tests in root directory and a subdirectory."""
    from renku.core.utils.contexts import chdir

    if request.param != ".":
        path = Path(request.param) / ".gitkeep"
        path.parent.mkdir(parents=True, exist_ok=True)
        path.touch()
        Repo().git.add(str(path))
        Repo().index.commit("Create subdirectory", skip_hooks=True)

    with chdir(request.param):
        yield Path(request.param).resolve()
Exemplo n.º 21
0
def list_dataset_files_view(user, cache):
    """List files in a dataset."""
    ctx = DatasetFilesListRequest().load(request.args)
    project = cache.get_project(cache.ensure_user(user), ctx['project_id'])

    if not project.abs_path.exists():
        return error_response(INVALID_PARAMS_ERROR_CODE,
                              'invalid project_id argument')

    with chdir(project.abs_path):
        ctx['files'] = list_files(datasets=[ctx['dataset_name']])

    return result_response(DatasetFilesListResponseRPC(), ctx)
Exemplo n.º 22
0
def test_run_from_non_root(runner, client, cwd):
    path = client.path / cwd
    path.mkdir(parents=True, exist_ok=True)
    with chdir(path):
        result = runner.invoke(cli, ['dataset'])
        assert 0 == result.exit_code
        assert 'Run CLI commands only from project\'s root' in result.output

        result = runner.invoke(cli, ['help'])
        assert 0 == result.exit_code
        assert 'Run CLI commands only from project' not in result.output

    result = runner.invoke(cli, ['dataset'])
    assert 0 == result.exit_code
    assert 'Run CLI commands only from project\'s root' not in result.output
Exemplo n.º 23
0
def add_file_to_dataset_view(user, cache):
    """Add the uploaded file to cloned repository."""
    ctx = DatasetAddRequest().load(request.json)
    user = cache.ensure_user(user)
    project = cache.get_project(user, ctx['project_id'])

    if not project.abs_path.exists():
        return error_response(
            INVALID_PARAMS_ERROR_CODE,
            'invalid project_id: {0}'.format(ctx['project_id']))

    if not ctx['commit_message']:
        ctx['commit_message'] = 'service: dataset add {0}'.format(
            ctx['dataset_name'])

    local_paths = []
    for _file in ctx['files']:
        local_path = None

        if 'file_id' in _file:
            file = cache.get_file(user, _file['file_id'])
            local_path = file.abs_path

        elif 'file_path' in _file:
            local_path = project.abs_path / Path(_file['file_path'])

        if not local_path or not local_path.exists():
            return error_response(
                INVALID_PARAMS_ERROR_CODE,
                'invalid file reference: {0}'.format(json.dumps(_file)))

        ctx['commit_message'] += ' {0}'.format(local_path.name)
        local_paths.append(str(local_path))

    with chdir(project.abs_path):
        add_file(local_paths,
                 ctx['dataset_name'],
                 create=ctx['create_dataset'],
                 commit_message=ctx['commit_message'])

        if not repo_sync(project.abs_path):
            return error_response(INTERNAL_FAILURE_ERROR_CODE,
                                  'repo sync failed')

    return result_response(DatasetAddResponseRPC(), ctx)
Exemplo n.º 24
0
def migrate_project_view(user_data, cache):
    """Migrate specified project."""
    user = cache.ensure_user(user_data)
    project = cache.get_project(user, request.json['project_id'])

    messages = []

    def collect_message(msg):
        """Collect migration message."""
        messages.append(msg)

    with chdir(project.abs_path):
        was_migrated = migrate_project(progress_callback=collect_message)

    return result_response(ProjectMigrateResponseRPC(), {
        'messages': messages,
        'was_migrated': was_migrated
    })
Exemplo n.º 25
0
def remove_dataset_view(user, cache):
    """Remove a dataset from a project."""
    ctx = DatasetRemoveRequest().load(request.json)
    project = cache.get_project(cache.ensure_user(user), ctx["project_id"])

    if not project.abs_path.exists():
        return error_response(INVALID_PARAMS_ERROR_CODE,
                              "invalid project_id argument")

    with chdir(project.abs_path):
        dataset_remove([ctx["name"]], commit_message=ctx["commit_message"])

    try:
        _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path),
                                            remote="origin")
    except GitCommandError:
        return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed")

    return result_response(DatasetRemoveResponseRPC(), ctx)
Exemplo n.º 26
0
def test_renku_clone_with_config(tmpdir):
    """Test cloning of a Renku repo and existence of required settings."""
    REMOTE = 'https://dev.renku.ch/gitlab/virginiafriedrich/datasets-test.git'

    with chdir(tmpdir):
        renku_clone(
            REMOTE,
            config={
                'user.name': 'sam',
                'user.email': '[email protected]',
                'filter.lfs.custom': '0'
            }
        )

        repo = git.Repo('datasets-test')
        reader = repo.config_reader()
        reader.values()

        lfs_config = dict(reader.items('filter.lfs'))
        assert '0' == lfs_config.get('custom')
Exemplo n.º 27
0
def create_dataset_view(user, cache):
    """Create a new dataset in a project."""
    ctx = DatasetCreateRequest().load(request.json)
    project = cache.get_project(cache.ensure_user(user), ctx['project_id'])

    with chdir(project.abs_path):
        create_dataset(ctx['short_name'],
                       title=ctx.get('name'),
                       creators=ctx.get('creator'),
                       description=ctx.get('description'),
                       keywords=ctx.get('keywords'),
                       commit_message=ctx['commit_message'])

    try:
        _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path),
                                            remote='origin')
    except GitCommandError:
        return error_response(INTERNAL_FAILURE_ERROR_CODE,
                              'push to remote failed silently - try again')

    return result_response(DatasetCreateResponseRPC(), ctx)
Exemplo n.º 28
0
def create_dataset_view(user, cache):
    """Create a new dataset in a project."""
    ctx = DatasetCreateRequest().load(request.json)
    project = cache.get_project(cache.ensure_user(user), ctx["project_id"])

    with chdir(project.abs_path):
        create_dataset(
            ctx["name"],
            title=ctx.get("title"),
            creators=ctx.get("creators"),
            description=ctx.get("description"),
            keywords=ctx.get("keywords"),
            commit_message=ctx["commit_message"],
        )

    try:
        _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path),
                                            remote="origin")
    except GitCommandError:
        return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed")

    return result_response(DatasetCreateResponseRPC(), ctx)
Exemplo n.º 29
0
def create_dataset_view(user, cache):
    """Create a new dataset in a project."""
    ctx = DatasetCreateRequest().load(request.json)
    project = cache.get_project(cache.ensure_user(user), ctx['project_id'])

    if not project.abs_path.exists():
        return error_response(INVALID_PARAMS_ERROR_CODE,
                              'invalid project_id argument')

    with chdir(project.abs_path):
        create_dataset(
            ctx['dataset_name'],
            commit_message=ctx['commit_message'],
            creators=ctx.get('creators'),
            description=ctx.get('description'),
        )

    if not repo_sync(project.abs_path):
        return error_response(INTERNAL_FAILURE_ERROR_CODE,
                              'push to remote failed silently - try again')

    return result_response(DatasetCreateResponseRPC(), ctx)
Exemplo n.º 30
0
def list_datasets_view(user, cache):
    """List all datasets in project."""
    req = DatasetListRequest().load(request.args)
    project = cache.get_project(user, req['project_id'])
    project_path = make_project_path(user, project)

    if not project_path:
        return jsonify(
            error={
                'code': INVALID_PARAMS_ERROR_CODE,
                'reason': 'invalid project_id argument',
            })

    with chdir(project_path):
        datasets = [
            DatasetDetails().load(ds, unknown=EXCLUDE)
            # TODO: fix core interface to address this issue (add ticket ref)
            for ds in json.loads(dataset_parent(None, 'data', 'json-ld'))
        ]

    response = DatasetListResponse().load({'datasets': datasets})
    return jsonify(DatasetListResponseRPC().load({'result': response}))