def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset, commit_message, name, url): """Add a remote file to a specified dataset.""" user = cache.ensure_user(user) worker_log.debug((f"executing dataset add remote " f"file job for {user.user_id}:{user.fullname}")) user_job = cache.get_job(user, user_job_id) user_job.in_progress() try: worker_log.debug(f"checking metadata for project {project_id}") project = cache.get_project(user, project_id) with chdir(project.abs_path): urls = url if isinstance(url, list) else [url] worker_log.debug(f"adding files {urls} to dataset {name}") add_file(urls, name, create=create_dataset, commit_message=commit_message) worker_log.debug("operation successful - syncing with remote") _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin") user_job.update_extras("remote_branch", remote_branch) user_job.complete() worker_log.debug("job completed") except (HTTPError, BaseException, GitCommandError, RenkuException) as exp: user_job.fail_job(str(exp)) # Reraise exception, so we see trace in job metadata # and in metrics as failed job. raise exp
def add_file_to_dataset_view(user_data, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx['project_id']) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['short_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_url' in _file: commit_message = '{0}{1}'.format(ctx['commit_message'], _file['file_url']) job = cache.make_job(user) _file['job_id'] = job.job_id with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue(dataset_add_remote_file, user_data, job.job_id, project.project_id, ctx['create_dataset'], commit_message, ctx['short_name'], _file['file_url']) continue if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) if local_paths: with chdir(project.abs_path): add_file(local_paths, ctx['short_name'], create=ctx['create_dataset'], force=ctx['force'], commit_message=ctx['commit_message']) try: _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path), remote='origin') except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def test_unlink_default(directory_tree, client): """Test unlink default behaviour.""" with chdir(client.path): create_dataset("dataset") add_file([directory_tree.join("dir2").strpath], "dataset") with pytest.raises(ParameterError): file_unlink("dataset", (), ())
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid project_id: {0}'.format(ctx['project_id']), }) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = make_file_path(user, file) elif 'file_path' in _file: local_path = project_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid file reference: {0}'.format( local_path.relative_to(project_path)) }) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project_path): return jsonify(error={ 'code': INTERNAL_FAILURE_ERROR_CODE, 'message': 'repo sync failed' }) return jsonify(DatasetAddResponseRPC().load( {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
def test_list_files_default(project, tmpdir): """Test a default file listing.""" create_dataset("ds1", title="", description="", creators=[], commit_message="my awesome dataset") data_file = tmpdir / Path("somefile") data_file.write_text("1,2,3", encoding="utf-8") add_file([str(data_file)], "ds1") files = list_files(datasets=["ds1"]) assert isinstance(files, list) assert "somefile" in [file_.name for file_ in files]
def add(name, urls, link, force, create, sources, destination, ref): """Add data to a dataset.""" progress = partial(progressbar, label='Adding data to dataset') add_file( urls=urls, name=name, link=link, force=force, create=create, sources=sources, destination=destination, ref=ref, urlscontext=progress )
def test_list_files_default(project, tmpdir): """Test a default file listing.""" create_dataset('ds1', title='', description='', creators=[], commit_message='my awesome dataset') data_file = tmpdir / Path('somefile') data_file.write_text('1,2,3', encoding='utf-8') add_file([str(data_file)], 'ds1') files = list_files(datasets=['ds1']) assert isinstance(files, list) assert 'somefile' in [ds.name for ds in files]
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user) project = cache.get_project(user, ctx['project_id']) if not project.abs_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid project_id: {0}'.format(ctx['project_id'])) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project.abs_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project.abs_path): return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def add(name, urls, external, force, overwrite, create, sources, destination, ref): """Add data to a dataset.""" progress = partial(progressbar, label="Adding data to dataset") add_file( urls=urls, name=name, external=external, force=force, overwrite=overwrite, create=create, sources=sources, destination=destination, ref=ref, urlscontext=progress, progress=_DownloadProgressbar, interactive=True, ) click.secho("OK", fg="green")
def add( short_name, urls, link, external, force, create, sources, destination, ref ): """Add data to a dataset.""" progress = partial(progressbar, label='Adding data to dataset') add_file( urls=urls, short_name=short_name, link=link, external=external, force=force, create=create, sources=sources, destination=destination, ref=ref, urlscontext=progress, progress=_DownloadProgressbar, interactive=True, ) click.secho('OK', fg='green')
def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset, commit_message, short_name, url): """Add a remote file to a specified dataset.""" user = cache.ensure_user(user) user_job = cache.get_job(user, user_job_id) project = cache.get_project(user, project_id) try: user_job.in_progress() with chdir(project.abs_path): urls = url if isinstance(url, list) else [url] add_file(urls, short_name, create=create_dataset, commit_message=commit_message) _, remote_branch = repo_sync(Repo(project.abs_path), remote='origin') user_job.update_extras('remote_branch', remote_branch) user_job.complete() except (HTTPError, BaseException, GitCommandError) as e: user_job.fail_job(str(e))
def add_file_to_dataset_view(user_data, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx["project_id"]) if not ctx["commit_message"]: ctx["commit_message"] = "service: dataset add {0}".format(ctx["name"]) local_paths = [] for _file in ctx["files"]: local_path = None if "file_url" in _file: commit_message = "{0}{1}".format(ctx["commit_message"], _file["file_url"]) job = cache.make_job( user, project=project, job_data={ "renku_op": "dataset_add_remote_file", "client_extras": ctx.get("client_extras") }, ) _file["job_id"] = job.job_id with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue( dataset_add_remote_file, user_data, job.job_id, project.project_id, ctx["create_dataset"], commit_message, ctx["name"], _file["file_url"], ) continue if "file_id" in _file: file = cache.get_file(user, _file["file_id"]) local_path = file.abs_path elif "file_path" in _file: local_path = project.abs_path / Path(_file["file_path"]) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, "invalid file reference: {0}".format(json.dumps(_file))) ctx["commit_message"] += " {0}".format(local_path.name) local_paths.append(str(local_path)) if local_paths: with chdir(project.abs_path): add_file( local_paths, ctx["name"], create=ctx["create_dataset"], force=ctx["force"], commit_message=ctx["commit_message"], ) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetAddResponseRPC(), ctx)