def add_file_to_dataset_view(user_data, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx['project_id']) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['short_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_url' in _file: commit_message = '{0}{1}'.format(ctx['commit_message'], _file['file_url']) job = cache.make_job(user) _file['job_id'] = job.job_id with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue(dataset_add_remote_file, user_data, job.job_id, project.project_id, ctx['create_dataset'], commit_message, ctx['short_name'], _file['file_url']) continue if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) if local_paths: with chdir(project.abs_path): add_file(local_paths, ctx['short_name'], create=ctx['create_dataset'], force=ctx['force'], commit_message=ctx['commit_message']) try: _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path), remote='origin') except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def decorated_function(*args, **kwargs): """Represents decorated function.""" try: return f(*args, **kwargs) except HTTPException as e: # handle general werkzeug exception return error_response(e.code, e.description) except (Exception, BaseException, OSError, IOError) as e: internal_error = 'internal error' if hasattr(e, 'stderr'): internal_error += ': {0}'.format(' '.join( e.stderr.strip().split('\n'))) return error_response(INTERNAL_FAILURE_ERROR_CODE, internal_error)
def import_dataset_view(user_data, cache): """Import a dataset view.""" user = cache.ensure_user(user_data) ctx = DatasetImportRequest().load(request.json) project = cache.get_project(user, ctx['project_id']) if project is None or project.abs_path is False: return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid project_id: {0}'.format(ctx['project_id'])) user_job = { 'job_id': uuid.uuid4().hex, 'state': USER_JOB_STATE_ENQUEUED, } job = cache.make_job(user, user_job) with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue( dataset_import, user_data, user_job['job_id'], ctx['project_id'], ctx['dataset_uri'], short_name=ctx.get('short_name'), extract=ctx.get('extract', False), timeout=int(os.getenv('WORKER_DATASET_JOBS_TIMEOUT', 1800)), result_ttl=int(os.getenv('WORKER_DATASET_JOBS_RESULT_TTL', 500))) return result_response(DatasetImportResponseRPC(), job)
def add_file_to_dataset_view(user, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user) project = cache.get_project(user, ctx['project_id']) if not project.abs_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid project_id: {0}'.format(ctx['project_id'])) if not ctx['commit_message']: ctx['commit_message'] = 'service: dataset add {0}'.format( ctx['dataset_name']) local_paths = [] for _file in ctx['files']: local_path = None if 'file_id' in _file: file = cache.get_file(user, _file['file_id']) local_path = file.abs_path elif 'file_path' in _file: local_path = project.abs_path / Path(_file['file_path']) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, 'invalid file reference: {0}'.format(json.dumps(_file))) ctx['commit_message'] += ' {0}'.format(local_path.name) local_paths.append(str(local_path)) with chdir(project.abs_path): add_file(local_paths, ctx['dataset_name'], create=ctx['create_dataset'], commit_message=ctx['commit_message']) if not repo_sync(project.abs_path): return error_response(INTERNAL_FAILURE_ERROR_CODE, 'repo sync failed') return result_response(DatasetAddResponseRPC(), ctx)
def decorated_function(*args, **kwargs): """Represents decorated function.""" try: return f(*args, **kwargs) except HTTPException as e: # handle general werkzeug exception capture_exception(e) return error_response(e.code, e.description) except (Exception, BaseException, OSError, IOError) as e: capture_exception(e) internal_error = "internal error" if hasattr(e, "stderr"): internal_error += ": {0}".format(" ".join( e.stderr.strip().split("\n"))) return error_response(INTERNAL_FAILURE_ERROR_CODE, internal_error)
def remove_dataset_view(user, cache): """Remove a dataset from a project.""" ctx = DatasetRemoveRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx["project_id"]) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, "invalid project_id argument") with chdir(project.abs_path): dataset_remove([ctx["name"]], commit_message=ctx["commit_message"]) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetRemoveResponseRPC(), ctx)
def test_error_response(svc_client): """Test error response utility.""" err_code = 0 err_reason = 'test error' response = error_response(err_code, err_reason).json assert response assert {'error'} == set(response.keys()) assert {'code', 'reason'} == set(response['error'].keys()) assert err_code == response['error']['code'] assert err_reason == response['error']['reason']
def test_error_response(svc_client): """Test error response utility.""" err_code = 0 err_reason = "test error" response = error_response(err_code, err_reason).json assert response assert {"error"} == set(response.keys()) assert {"code", "reason"} == set(response["error"].keys()) assert err_code == response["error"]["code"] assert err_reason == response["error"]["reason"]
def list_dataset_files_view(user, cache): """List files in a dataset.""" ctx = DatasetFilesListRequest().load(request.args) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, 'invalid project_id argument') with chdir(project.abs_path): ctx['files'] = list_files(datasets=[ctx['dataset_name']]) return result_response(DatasetFilesListResponseRPC(), ctx)
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) if not project.abs_path.exists(): return error_response(INVALID_PARAMS_ERROR_CODE, 'invalid project_id argument') with chdir(project.abs_path): create_dataset( ctx['dataset_name'], commit_message=ctx['commit_message'], creators=ctx.get('creators'), description=ctx.get('description'), ) if not repo_sync(project.abs_path): return error_response(INTERNAL_FAILURE_ERROR_CODE, 'push to remote failed silently - try again') return result_response(DatasetCreateResponseRPC(), ctx)
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx['project_id']) with chdir(project.abs_path): create_dataset(ctx['short_name'], title=ctx.get('name'), creators=ctx.get('creator'), description=ctx.get('description'), keywords=ctx.get('keywords'), commit_message=ctx['commit_message']) try: _, ctx['remote_branch'] = repo_sync(Repo(project.abs_path), remote='origin') except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, 'push to remote failed silently - try again') return result_response(DatasetCreateResponseRPC(), ctx)
def create_dataset_view(user, cache): """Create a new dataset in a project.""" ctx = DatasetCreateRequest().load(request.json) project = cache.get_project(cache.ensure_user(user), ctx["project_id"]) with chdir(project.abs_path): create_dataset( ctx["name"], title=ctx.get("title"), creators=ctx.get("creators"), description=ctx.get("description"), keywords=ctx.get("keywords"), commit_message=ctx["commit_message"], ) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetCreateResponseRPC(), ctx)
def add_file_to_dataset_view(user_data, cache): """Add the uploaded file to cloned repository.""" ctx = DatasetAddRequest().load(request.json) user = cache.ensure_user(user_data) project = cache.get_project(user, ctx["project_id"]) if not ctx["commit_message"]: ctx["commit_message"] = "service: dataset add {0}".format(ctx["name"]) local_paths = [] for _file in ctx["files"]: local_path = None if "file_url" in _file: commit_message = "{0}{1}".format(ctx["commit_message"], _file["file_url"]) job = cache.make_job( user, project=project, job_data={ "renku_op": "dataset_add_remote_file", "client_extras": ctx.get("client_extras") }, ) _file["job_id"] = job.job_id with enqueue_retry(DATASETS_JOB_QUEUE) as queue: queue.enqueue( dataset_add_remote_file, user_data, job.job_id, project.project_id, ctx["create_dataset"], commit_message, ctx["name"], _file["file_url"], ) continue if "file_id" in _file: file = cache.get_file(user, _file["file_id"]) local_path = file.abs_path elif "file_path" in _file: local_path = project.abs_path / Path(_file["file_path"]) if not local_path or not local_path.exists(): return error_response( INVALID_PARAMS_ERROR_CODE, "invalid file reference: {0}".format(json.dumps(_file))) ctx["commit_message"] += " {0}".format(local_path.name) local_paths.append(str(local_path)) if local_paths: with chdir(project.abs_path): add_file( local_paths, ctx["name"], create=ctx["create_dataset"], force=ctx["force"], commit_message=ctx["commit_message"], ) try: _, ctx["remote_branch"] = repo_sync(Repo(project.abs_path), remote="origin") except GitCommandError: return error_response(INTERNAL_FAILURE_ERROR_CODE, "repo sync failed") return result_response(DatasetAddResponseRPC(), ctx)
def create_project_from_template(user, cache): """Create a new project starting form target template.""" ctx = ProjectTemplateRequest().load({ **user, **request.json, }, unknown=EXCLUDE) # Clone project and find target template template_project = _project_clone(user, ctx) templates = read_template_manifest(template_project.abs_path) template = next( (template for template in templates if template["folder"] == ctx["identifier"]), None) if template is None: return error_response(INVALID_PARAMS_ERROR_CODE, "invalid identifier for target repository") # Verify missing parameters template_parameters = template.get("variables", {}) provided_parameters = {p["key"]: p["value"] for p in ctx["parameters"]} missing_keys = list(template_parameters.keys() - provided_parameters.keys()) if len(missing_keys) > 0: return error_response(INVALID_PARAMS_ERROR_CODE, f"missing parameter: {missing_keys[0]}") # Create new path new_project_path = make_new_project_path(user, ctx) if new_project_path.exists(): shutil.rmtree(str(new_project_path)) new_project_path.mkdir(parents=True, exist_ok=True) default_metadata = { "__template_source__": ctx["git_url"], "__template_ref__": ctx["ref"], "__template_id__": ctx["identifier"], "__namespace__": ctx["project_namespace"], "__repository__": ctx["project_repository"], "__sanitized_project_name__": ctx["project_name_stripped"], "__project_slug__": ctx["project_slug"], } # prepare data and init new project source_path = template_project.abs_path / ctx["identifier"] git_user = {"email": user["email"], "name": user["fullname"]} with chdir(new_project_path): create_from_template_local( source_path, ctx["project_name"], provided_parameters, default_metadata, git_user, ctx["url"], ctx["ref"], "service", ) new_repo_push(new_project_path, ctx["new_project_url_with_auth"]) resp = { "url": ctx["new_project_url"], "namespace": ctx["project_namespace"], "name": ctx["project_name_stripped"], } return result_response(ProjectTemplateResponseRPC(), resp)