def test_dataset_import_twice_job(doi, svc_client_with_repo): """Test dataset import.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {'user_id': headers['Renku-User-Id']} payload = { 'project_id': project_id, 'dataset_uri': doi, } response = svc_client.post( '/datasets.import', data=json.dumps(payload), headers=headers, ) assert response assert_rpc_response(response) assert {'job_id', 'created_at'} == set(response.json['result'].keys()) dest = make_project_path( user, { 'owner': url_components.owner, 'name': url_components.name } ) old_commit = Repo(dest).head.commit job_id = response.json['result']['job_id'] dataset_import( user, job_id, project_id, doi, ) new_commit = Repo(dest).head.commit assert old_commit.hexsha != new_commit.hexsha with pytest.raises(DatasetExistsError): dataset_import( user, job_id, project_id, doi, ) new_commit2 = Repo(dest).head.commit assert new_commit.hexsha == new_commit2.hexsha response = svc_client.get( f'/jobs/{job_id}', data=json.dumps(payload), headers=headers, ) assert_rpc_response(response) extras = response.json['result']['extras'] assert 'error' in extras assert 'Dataset exists' in extras['error']
def test_dataset_import_junk_job(doi, expected_err, svc_client_with_repo): """Test dataset import.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {"user_id": headers["Renku-User-Id"]} payload = { "project_id": project_id, "dataset_uri": doi, } response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,) assert response assert_rpc_response(response) assert {"job_id", "created_at"} == set(response.json["result"].keys()) dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name}) old_commit = Repo(dest).head.commit job_id = response.json["result"]["job_id"] with pytest.raises(ParameterError): dataset_import( user, job_id, project_id, doi, ) new_commit = Repo(dest).head.commit assert old_commit.hexsha == new_commit.hexsha response = svc_client.get(f"/jobs/{job_id}", data=json.dumps(payload), headers=headers,) assert_rpc_response(response) extras = response.json["result"]["extras"] assert "error" in extras assert expected_err in extras["error"]
def test_dataset_import_job(doi, svc_client_with_repo): """Test dataset import via doi.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {"user_id": headers["Renku-User-Id"]} payload = { "project_id": project_id, "dataset_uri": doi, } response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,) assert response assert_rpc_response(response) assert {"job_id", "created_at"} == set(response.json["result"].keys()) dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name}) old_commit = Repo(dest).head.commit job_id = response.json["result"]["job_id"] dataset_import( user, job_id, project_id, doi, ) new_commit = Repo(dest).head.commit assert old_commit.hexsha != new_commit.hexsha assert f"service: dataset import {doi}" == new_commit.message response = svc_client.get(f"/jobs/{job_id}", headers=headers,) assert response assert_rpc_response(response) assert "COMPLETED" == response.json["result"]["state"]
def _project_clone(cache, user_data, project_data): """Clones the project for a given user.""" local_path = make_project_path(user_data, project_data) user = cache.ensure_user(user_data) if local_path.exists(): shutil.rmtree(str(local_path)) for project in cache.get_projects(user): if project.git_url == project_data["git_url"]: project.delete() local_path.mkdir(parents=True, exist_ok=True) repo = project_clone( project_data["url_with_auth"], local_path, depth=project_data["depth"] if project_data["depth"] != 0 else None, raise_git_except=True, config={ "user.name": project_data["fullname"], "user.email": project_data["email"], }, checkout_rev=project_data["ref"], ) service_log.debug(f"project successfully cloned: {repo}") service_log.debug(f"project folder exists: {local_path.exists()}") project = cache.make_project(user, project_data) return project
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid project_id argument', }) with chdir(project_path): create_dataset( ctx['dataset_name'], commit_message=ctx['commit_message'], creators=ctx.get('creators'), description=ctx.get('description'), ) if not repo_sync(project_path): return jsonify( error={ 'code': INTERNAL_FAILURE_ERROR_CODE, 'reason': 'push to remote failed silently - try again' }) return jsonify(DatasetCreateResponseRPC().load( {'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE)}))
def project_clone(user, cache): """Clone a remote repository.""" ctx = ProjectCloneContext().load( (lambda a, b: a.update(b) or a)(request.json, user), unknown=EXCLUDE, ) local_path = make_project_path(user, ctx) user = cache.ensure_user(user) if local_path.exists(): shutil.rmtree(str(local_path)) for project in cache.get_projects(user): if project.git_url == ctx['git_url']: project.delete() local_path.mkdir(parents=True, exist_ok=True) renku_clone(ctx['url_with_auth'], local_path, depth=ctx['depth'], raise_git_except=True, config={ 'user.name': ctx['fullname'], 'user.email': ctx['email'], }) project = cache.make_project(user, ctx) return result_response(ProjectCloneResponseRPC(), project)
def _project_clone(cache, user_data, project_data): """Clones the project for a given user.""" local_path = make_project_path(user_data, project_data) user = cache.ensure_user(user_data) if local_path.exists(): shutil.rmtree(str(local_path)) for project in cache.get_projects(user): if project.git_url == project_data['git_url']: project.delete() local_path.mkdir(parents=True, exist_ok=True) project_clone( project_data['url_with_auth'], local_path, depth=project_data['depth'] if project_data['depth'] != 0 else None, raise_git_except=True, config={ 'user.name': project_data['fullname'], 'user.email': project_data['email'], }, checkout_rev=project_data['ref']) project = cache.make_project(user, project_data) return project
def test_dataset_project_lock(doi, svc_client_with_repo): """Test dataset project lock.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {'user_id': headers['Renku-User-Id']} payload = { 'project_id': project_id, 'dataset_uri': doi, } response = svc_client.post( '/datasets.import', data=json.dumps(payload), headers=headers, ) assert response assert_rpc_response(response) assert {'job_id', 'created_at'} == set(response.json['result'].keys()) dest = make_project_path(user, { 'owner': url_components.owner, 'name': url_components.name }) old_commit = Repo(dest).head.commit cache_project_cleanup() new_commit = Repo(dest).head.commit assert old_commit.hexsha == new_commit.hexsha assert dest.exists() and [file for file in dest.glob('*')]
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid project_id: {0}'.format(ctx['project_id']), }) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = make_file_path(user, file) elif 'file_path' in _file: local_path = project_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'message': 'invalid file reference: {0}'.format( local_path.relative_to(project_path)) }) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project_path): return jsonify(error={ 'code': INTERNAL_FAILURE_ERROR_CODE, 'message': 'repo sync failed' }) return jsonify(DatasetAddResponseRPC().load( {'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE)}))
def test_dataset_url_import_job(url, svc_client_with_repo): """Test dataset import via url.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {'user_id': headers['Renku-User-Id']} payload = { 'project_id': project_id, 'dataset_uri': url, } response = svc_client.post( '/datasets.import', data=json.dumps(payload), headers=headers, ) assert response assert_rpc_response(response) assert {'job_id', 'created_at'} == set(response.json['result'].keys()) dest = make_project_path( user, { 'owner': url_components.owner, 'name': url_components.name } ) old_commit = Repo(dest).head.commit job_id = response.json['result']['job_id'] dataset_import( user, job_id, project_id, url, ) new_commit = Repo(dest).head.commit assert old_commit.hexsha != new_commit.hexsha assert f'service: dataset import {url}' == new_commit.message response = svc_client.get( f'/jobs/{job_id}', headers=headers, ) assert response assert_rpc_response(response) assert 'COMPLETED' == response.json['result']['state']
def list_datasets_view(user, cache): """List all datasets in project.""" req = DatasetListRequest().load(request.args) project = cache.get_project(user, req['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'reason': 'invalid project_id argument', }) with chdir(project_path): datasets = [ DatasetDetails().load(ds, unknown=EXCLUDE) # TODO: fix core interface to address this issue (add ticket ref) for ds in json.loads(dataset_parent(None, 'data', 'json-ld')) ] response = DatasetListResponse().load({'datasets': datasets}) return jsonify(DatasetListResponseRPC().load({'result': response}))
def test_dataset_add_remote_file(url, svc_client_with_repo): """Test dataset add a remote file.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {'user_id': headers['Renku-User-Id']} payload = { 'project_id': project_id, 'short_name': uuid.uuid4().hex, 'create_dataset': True, 'files': [{ 'file_url': url }] } response = svc_client.post( '/datasets.add', data=json.dumps(payload), headers=headers, ) assert response assert_rpc_response(response) assert {'files', 'short_name', 'project_id'} == set(response.json['result'].keys()) dest = make_project_path(user, { 'owner': url_components.owner, 'name': url_components.name }) old_commit = Repo(dest).head.commit job_id = response.json['result']['files'][0]['job_id'] commit_message = 'service: dataset add remote file' dataset_add_remote_file(user, job_id, project_id, True, commit_message, payload['short_name'], url) new_commit = Repo(dest).head.commit assert old_commit.hexsha != new_commit.hexsha assert commit_message == new_commit.message
def test_dataset_project_lock(doi, svc_client_with_repo): """Test dataset project lock.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {"user_id": headers["Renku-User-Id"]} payload = { "project_id": project_id, "dataset_uri": doi, } response = svc_client.post("/datasets.import", data=json.dumps(payload), headers=headers,) assert response assert_rpc_response(response) assert {"job_id", "created_at"} == set(response.json["result"].keys()) dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name}) old_commit = Repo(dest).head.commit cache_project_cleanup() new_commit = Repo(dest).head.commit assert old_commit.hexsha == new_commit.hexsha assert dest.exists() and [file for file in dest.glob("*")]
def test_dataset_add_remote_file(url, svc_client_with_repo): """Test dataset add a remote file.""" svc_client, headers, project_id, url_components = svc_client_with_repo user = {"user_id": headers["Renku-User-Id"]} payload = {"project_id": project_id, "name": uuid.uuid4().hex, "create_dataset": True, "files": [{"file_url": url}]} response = svc_client.post("/datasets.add", data=json.dumps(payload), headers=headers,) assert response assert_rpc_response(response) assert {"files", "name", "project_id"} == set(response.json["result"].keys()) dest = make_project_path(user, {"owner": url_components.owner, "name": url_components.name}) old_commit = Repo(dest).head.commit job_id = response.json["result"]["files"][0]["job_id"] commit_message = "service: dataset add remote file" dataset_add_remote_file(user, job_id, project_id, True, commit_message, payload["name"], url) new_commit = Repo(dest).head.commit assert old_commit.hexsha != new_commit.hexsha assert commit_message == new_commit.message
def list_dataset_files_view(user, cache): """List files in a dataset.""" ctx = DatasetFilesListRequest().load(request.args) project = cache.get_project(user, ctx['project_id']) project_path = make_project_path(user, project) if not project_path: return jsonify( error={ 'code': INVALID_PARAMS_ERROR_CODE, 'reason': 'invalid project_id argument', }) with chdir(project_path): dataset_files = json.loads( # TODO: fix core interface to address this issue (add ticket ref) list_files(ctx['dataset_name'], None, None, None, 'json-ld')) ctx['files'] = [ DatasetFileDetails().load(ds, unknown=EXCLUDE) for ds in dataset_files ] response = DatasetFilesListResponse().load(ctx, unknown=EXCLUDE) return jsonify(DatasetFilesListResponseRPC().load({'result': response}))