def _create_and_associate_reana_user(sender, token=None, response=None, account_info=None): try: user_email = account_info["user"]["email"] user_fullname = account_info["user"]["profile"]["full_name"] username = account_info["user"]["profile"]["username"] search_criteria = dict() search_criteria["email"] = user_email users = Session.query(User).filter_by(**search_criteria).all() if users: user = users[0] else: user_parameters = dict(email=user_email, full_name=user_fullname, username=username) user = User(**user_parameters) Session.add(user) Session.commit() except (InvalidRequestError, IntegrityError): Session.rollback() raise ValueError("Could not create user, " "possible constraint violation") except Exception: raise ValueError("Could not create user") return user
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = Session.query(JobCache).filter_by( job_id=msg['caching_info'].get('job_id')).first() input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg['caching_info'].get('workflow_workspace')) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg['caching_info']['job_spec']['cmd'] # removes cd to workspace, to be refactored clean_cmd = ';'.join(cmd.split(';')[1:]) msg['caching_info']['job_spec']['cmd'] = clean_cmd if 'workflow_workspace' in msg['caching_info']['job_spec']: del msg['caching_info']['job_spec']['workflow_workspace'] input_hash = calculate_job_input_hash(msg['caching_info']['job_spec'], msg['caching_info']['workflow_json']) workspace_hash = calculate_hash_of_dir( msg['caching_info'].get('workflow_workspace'), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg['caching_info'].get('result_path') cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def users_create_default(email, password, id_): """Create default user. This user has the administrator role and can retrieve other user information as well as create new users. """ reana_user_characteristics = { "id_": id_, "email": email, } try: user = User.query.filter_by(**reana_user_characteristics).first() if not user: reana_user_characteristics["access_token"] = secrets.token_urlsafe( 16) user = User(**reana_user_characteristics) create_user_workspace(user.get_user_workspace()) Session.add(user) Session.commit() # create invenio user, passing `confirmed_at` to mark it as confirmed register_user(email=email, password=password, confirmed_at=datetime.datetime.now()) click.echo(reana_user_characteristics["access_token"]) except Exception as e: click.echo("Something went wrong: {0}".format(e)) sys.exit(1)
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = (Session.query(JobCache).filter_by( job_id=msg["caching_info"].get("job_id")).first()) input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg["caching_info"].get("workflow_workspace")) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg["caching_info"]["job_spec"]["cmd"] # removes cd to workspace, to be refactored clean_cmd = ";".join(cmd.split(";")[1:]) msg["caching_info"]["job_spec"]["cmd"] = clean_cmd if "workflow_workspace" in msg["caching_info"]["job_spec"]: del msg["caching_info"]["job_spec"]["workflow_workspace"] input_hash = calculate_job_input_hash(msg["caching_info"]["job_spec"], msg["caching_info"]["workflow_json"]) workspace_hash = calculate_hash_of_dir( msg["caching_info"].get("workflow_workspace"), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg["caching_info"].get("result_path") cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def _create_and_associate_reana_user(sender, token=None, response=None, account_info=None): try: user_email = account_info['user']['email'] user_fullname = account_info['user']['profile']['full_name'] username = account_info['user']['profile']['username'] search_criteria = dict() search_criteria['email'] = user_email users = Session.query(User).filter_by(**search_criteria).all() if users: user = users[0] else: user_access_token = secrets.token_urlsafe(16) user_parameters = dict(access_token=user_access_token) user_parameters['email'] = user_email user_parameters['full_name'] = user_fullname user_parameters['username'] = username user = User(**user_parameters) Session.add(user) Session.commit() except (InvalidRequestError, IntegrityError): Session.rollback() raise ValueError('Could not create user, ' 'possible constraint violation') except Exception: raise ValueError('Could not create user') return user
def _update_run_progress(workflow_uuid, msg): """Register succeeded Jobs to DB.""" workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).\ one_or_none() cached_jobs = None job_progress = workflow.job_progress if "cached" in msg['progress']: cached_jobs = msg['progress']['cached'] for status in PROGRESS_STATUSES: if status in msg['progress']: previous_status = workflow.job_progress.get(status) previous_total = 0 if previous_status: previous_total = previous_status.get('total') or 0 if status == 'total': if previous_total > 0: continue else: job_progress['total'] = \ msg['progress']['total'] else: if previous_status: new_job_ids = set(previous_status.get('job_ids') or set()) | \ set(msg['progress'][status]['job_ids']) else: new_job_ids = set(msg['progress'][status]['job_ids']) job_progress[status] = { 'total': len(new_job_ids), 'job_ids': list(new_job_ids) } workflow.job_progress = job_progress flag_modified(workflow, 'job_progress') Session.add(workflow)
def _update_run_progress(workflow_uuid, msg): """Register succeeded Jobs to DB.""" workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).one_or_none() cached_jobs = None job_progress = workflow.job_progress if "cached" in msg["progress"]: cached_jobs = msg["progress"]["cached"] for status in PROGRESS_STATUSES: if status in msg["progress"]: previous_status = workflow.job_progress.get(status) previous_total = 0 if previous_status: previous_total = previous_status.get("total") or 0 if status == "total": if previous_total > 0: continue else: job_progress["total"] = msg["progress"]["total"] else: if previous_status: new_job_ids = set(previous_status.get("job_ids") or set()) | set( msg["progress"][status]["job_ids"] ) else: new_job_ids = set(msg["progress"][status]["job_ids"]) job_progress[status] = { "total": len(new_job_ids), "job_ids": list(new_job_ids), } workflow.job_progress = job_progress flag_modified(workflow, "job_progress") Session.add(workflow)
def cache_job(self): """Cache a job.""" workflow = Session.query(Workflow).filter_by( id_=self.workflow_uuid).one_or_none() access_times = calculate_file_access_time(workflow.workspace_path) prepared_job_cache = JobCache() prepared_job_cache.job_id = self.job_id prepared_job_cache.access_times = access_times Session.add(prepared_job_cache) Session.commit()
def stop_workflow(workflow): """Stop a given workflow.""" if workflow.status == RunStatus.running: kwrm = KubernetesWorkflowRunManager(workflow) kwrm.stop_batch_workflow_run() workflow.status = RunStatus.stopped Session.add(workflow) Session.commit() else: message = ("Workflow {id_} is not running.").format(id_=workflow.id_) raise REANAWorkflowControllerError(message)
def set_quota_limit(ctx, admin_access_token, emails, resource_name, limit): """Set quota limits to the given users per resource.""" try: for email in emails: error_msg = None user = _get_user_by_criteria(None, email) resource = Resource.query.filter_by( name=resource_name).one_or_none() if not user: error_msg = f"ERROR: Provided user {email} does not exist." elif not resource: error_msg = ( "ERROR: Provided resource name does not exist. Available " f"resources are {[resource.name for resource in Resource.query]}." ) if error_msg: click.secho( error_msg, fg="red", err=True, ) sys.exit(1) user_resource = UserResource.query.filter_by( user=user, resource=resource).one_or_none() if user_resource: user_resource.quota_limit = limit Session.add(user_resource) else: # Create user resource in case there isn't one. Useful for old users. user.resources.append( UserResource( user_id=user.id_, resource_id=resource.id_, quota_limit=limit, quota_used=0, )) Session.commit() click.secho( f"Quota limit {limit} for '{resource.name}' successfully set to users {emails}.", fg="green", ) except Exception as e: logging.debug(traceback.format_exc()) logging.debug(str(e)) click.echo( click.style("Quota could not be set: \n{}".format(str(e)), fg="red"), err=True, )
def store_workflow_disk_quota(workflow, bytes_to_sum=None): """ Update or create disk workflow resource. :param workflow: Workflow whose disk resource usage must be calculated. :param bytes_to_sum: Amount of bytes to sum to workflow disk quota, if None, `du` will be used to recalculate it. :type workflow: reana_db.models.Workflow :type bytes_to_sum: int """ from reana_commons.errors import REANAMissingWorkspaceError from reana_commons.utils import get_disk_usage from reana_db.database import Session from reana_db.models import ResourceType, WorkflowResource def _get_disk_usage_or_zero(workflow): """Get disk usage for a workflow if the workspace exists, zero if not.""" try: disk_bytes = get_disk_usage(workflow.workspace_path, summarize=True) return int(disk_bytes[0]["size"]["raw"]) except REANAMissingWorkspaceError: return 0 disk_resource = get_default_quota_resource(ResourceType.disk.name) workflow_resource = ( Session.query(WorkflowResource) .filter_by(workflow_id=workflow.id_, resource_id=disk_resource.id_) .one_or_none() ) if workflow_resource: if bytes_to_sum: workflow_resource.quota_used += bytes_to_sum else: workflow_resource.quota_used = _get_disk_usage_or_zero(workflow) Session.commit() elif inspect(workflow).persistent: workflow_resource = WorkflowResource( workflow_id=workflow.id_, resource_id=disk_resource.id_, quota_used=_get_disk_usage_or_zero(workflow), ) Session.add(workflow_resource) Session.commit() return workflow_resource
def _import_users(admin_access_token, users_csv_file): """Import list of users to database. :param admin_access_token: Admin access token. :type admin_access_token: str :param users_csv_file: CSV file object containing a list of users. :type users_csv_file: _io.TextIOWrapper """ admin = User.query.filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError('Admin access token invalid.') csv_reader = csv.reader(users_csv_file) for row in csv_reader: user = User(id_=row[0], email=row[1], access_token=row[2]) Session.add(user) Session.commit() Session.remove()
def create_job_in_db(self, backend_job_id): """Create job in db.""" job_db_entry = JobTable(backend_job_id=backend_job_id, workflow_uuid=self.workflow_uuid, status=JobStatus.created.name, compute_backend=self.compute_backend, cvmfs_mounts=self.cvmfs_mounts or '', shared_file_system=self.shared_file_system or False, docker_img=self.docker_img, cmd=json.dumps(self.cmd), env_vars=json.dumps(self.env_vars), deleted=False, job_name=self.job_name, prettified_cmd=self.prettified_cmd) Session.add(job_db_entry) Session.commit() self.job_id = str(job_db_entry.id_)
def _create_user(email, user_access_token, admin_access_token): """Create user with provided credentials.""" try: admin = Session.query(User).filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError('Admin access token invalid.') if not user_access_token: user_access_token = secrets.token_urlsafe(16) user_parameters = dict(access_token=user_access_token) user_parameters['email'] = email user = User(**user_parameters) Session.add(user) Session.commit() except (InvalidRequestError, IntegrityError) as e: Session.rollback() raise ValueError('Could not create user, ' 'possible constraint violation') return user
def _create_and_associate_reana_user(email, fullname, username): try: search_criteria = dict() search_criteria["email"] = email users = Session.query(User).filter_by(**search_criteria).all() if users: user = users[0] else: user_parameters = dict(email=email, full_name=fullname, username=username) user = User(**user_parameters) Session.add(user) Session.commit() except (InvalidRequestError, IntegrityError): Session.rollback() raise ValueError( "Could not create user, possible constraint violation") except Exception: raise ValueError("Could not create user") return user
def update_workflow_cpu_quota(workflow) -> int: """Update workflow CPU quota based on started and finished/stopped times. :return: Workflow running time in milliseconds if workflow has terminated, else 0. """ from reana_db.database import Session from reana_db.models import ( ResourceType, UserResource, WorkflowResource, ) terminated_at = workflow.run_finished_at or workflow.run_stopped_at if workflow.run_started_at and terminated_at: cpu_time = terminated_at - workflow.run_started_at cpu_milliseconds = int(cpu_time.total_seconds() * 1000) cpu_resource = get_default_quota_resource(ResourceType.cpu.name) # WorkflowResource might exist already if the cluster # follows a combined termination + periodic policy (eg. created # by the status listener, revisited by the cronjob) workflow_resource = WorkflowResource.query.filter_by( workflow_id=workflow.id_, resource_id=cpu_resource.id_).one_or_none() if workflow_resource: workflow_resource.quota_used = cpu_milliseconds else: workflow_resource = WorkflowResource( workflow_id=workflow.id_, resource_id=cpu_resource.id_, quota_used=cpu_milliseconds, ) user_resource_quota = UserResource.query.filter_by( user_id=workflow.owner_id, resource_id=cpu_resource.id_).first() user_resource_quota.quota_used += cpu_milliseconds Session.add(workflow_resource) Session.commit() return cpu_milliseconds return 0
def store_workflow_disk_quota(workflow, bytes_to_sum: Optional[int] = None): """ Update or create disk workflow resource. :param workflow: Workflow whose disk resource usage must be calculated. :param bytes_to_sum: Amount of bytes to sum to workflow disk quota, if None, `du` will be used to recalculate it. :type workflow: reana_db.models.Workflow :type bytes_to_sum: int """ from reana_db.database import Session from reana_db.models import ResourceType, WorkflowResource if (ResourceType.disk.name not in WORKFLOW_TERMINATION_QUOTA_UPDATE_POLICY and not PERIODIC_RESOURCE_QUOTA_UPDATE_POLICY): return disk_resource = get_default_quota_resource(ResourceType.disk.name) workflow_resource = (Session.query(WorkflowResource).filter_by( workflow_id=workflow.id_, resource_id=disk_resource.id_).one_or_none()) if workflow_resource: if bytes_to_sum: workflow_resource.quota_used += bytes_to_sum else: workflow_resource.quota_used = get_disk_usage_or_zero( workflow.workspace_path) Session.commit() elif inspect(workflow).persistent: workflow_resource = WorkflowResource( workflow_id=workflow.id_, resource_id=disk_resource.id_, quota_used=get_disk_usage_or_zero(workflow.workspace_path), ) Session.add(workflow_resource) Session.commit() return workflow_resource
def clone_workflow(workflow, reana_spec, restart_type): """Create a copy of workflow in DB for restarting.""" try: cloned_workflow = Workflow( id_=str(uuid4()), name=workflow.name, owner_id=workflow.owner_id, reana_specification=reana_spec or workflow.reana_specification, type_=restart_type or workflow.type_, logs="", workspace_path=workflow.workspace_path, restart=True, run_number=workflow.run_number, ) Session.add(cloned_workflow) Session.object_session(cloned_workflow).commit() return cloned_workflow except SQLAlchemyError as e: message = "Database connection failed, please retry." logging.error( f"Error while creating {cloned_workflow.id_}: {message}\n{e}", exc_info=True)
def users_create_default(email, id_): """Create default user. This user has the administrator role and can retrieve other user information as well as create new users. """ user_characteristics = {"id_": id_, "email": email, } try: user = User.query.filter_by(**user_characteristics).first() if not user: user_characteristics['access_token'] = secrets.token_urlsafe() user = User(**user_characteristics) create_user_workspace(user.get_user_workspace()) Session.add(user) Session.commit() click.echo('Created 1st user with access_token: {}'. format(user_characteristics['access_token'])) except Exception as e: click.echo('Something went wrong: {0}'.format(e)) sys.exit(1)
def set_quota_limit(ctx, emails, resource_type, resource_name, limit): """Set quota limits to the given users per resource.""" try: for email in emails: error_msg = None resource = None user = _get_user_by_criteria(None, email) if resource_name: resource = Resource.query.filter_by( name=resource_name).one_or_none() elif resource_type in ResourceType._member_names_: resources = Resource.query.filter_by(type_=resource_type).all() if resources and len(resources) > 1: click.secho( f"ERROR: There are more than one `{resource_type}` resource. " "Please provide resource name with `--resource-name` option to specify the exact resource.", fg="red", err=True, ) sys.exit(1) else: resource = resources[0] if not user: error_msg = f"ERROR: Provided user {email} does not exist." elif not resource: resources = [ f"{resource.type_.name} ({resource.name})" for resource in Resource.query ] error_msg = ( f"ERROR: Provided resource `{resource_name or resource_type}` does not exist. " if resource_name or resource_type else "ERROR: Please provide a resource. ") error_msg += f"Available resources are: {', '.join(resources)}." if error_msg: click.secho( error_msg, fg="red", err=True, ) sys.exit(1) user_resource = UserResource.query.filter_by( user=user, resource=resource).one_or_none() if user_resource: user_resource.quota_limit = limit Session.add(user_resource) else: # Create user resource in case there isn't one. Useful for old users. user.resources.append( UserResource( user_id=user.id_, resource_id=resource.id_, quota_limit=limit, quota_used=0, )) Session.commit() click.secho( f"Quota limit {limit} for '{resource.type_.name} ({resource.name})' successfully set to users {emails}.", fg="green", ) except Exception as e: logging.debug(traceback.format_exc()) logging.debug(str(e)) click.echo( click.style("Quota could not be set: \n{}".format(str(e)), fg="red"), err=True, )
def create_workflow(): # noqa r"""Create workflow and its workspace. --- post: summary: Create workflow and its workspace. description: >- This resource expects all necessary data to represent a workflow so it is stored in database and its workspace is created. operationId: create_workflow produces: - application/json parameters: - name: user in: query description: Required. UUID of workflow owner. required: true type: string - name: workflow in: body description: >- JSON object including workflow parameters and workflow specification in JSON format (`yadageschemas.load()` output) with necessary data to instantiate a yadage workflow. required: true schema: type: object properties: operational_options: type: object description: Operational options. reana_specification: type: object description: >- Workflow specification in JSON format. workflow_name: type: string description: Workflow name. If empty name will be generated. git_data: type: object description: >- GitLab data. required: [reana_specification, workflow_name, operational_options] responses: 201: description: >- Request succeeded. The workflow has been created along with its workspace schema: type: object properties: message: type: string workflow_id: type: string workflow_name: type: string examples: application/json: { "message": "Workflow workspace has been created.", "workflow_id": "cdcf48b1-c2f3-4693-8230-b066e088c6ac", "workflow_name": "mytest-1" } 400: description: >- Request failed. The incoming data specification seems malformed 404: description: >- Request failed. User does not exist. examples: application/json: { "message": "User 00000000-0000-0000-0000-000000000000 does not exist" } """ try: user_uuid = request.args["user"] user = User.query.filter(User.id_ == user_uuid).first() if not user: return ( jsonify({ "message": "User with id:{} does not exist".format(user_uuid) }), 404, ) workflow_uuid = str(uuid4()) # Use name prefix user specified or use default name prefix # Actual name is prefix + autoincremented run_number. workflow_name = request.json.get("workflow_name", "") if workflow_name == "": workflow_name = DEFAULT_NAME_FOR_WORKFLOWS else: try: workflow_name.encode("ascii") except UnicodeEncodeError: # `workflow_name` contains something else than just ASCII. raise REANAWorkflowNameError( "Workflow name {} is not valid.".format(workflow_name)) git_ref = "" git_repo = "" if "git_data" in request.json: git_data = request.json["git_data"] git_ref = git_data["git_commit_sha"] git_repo = git_data["git_url"] # add spec and params to DB as JSON workflow = Workflow( id_=workflow_uuid, name=workflow_name, owner_id=request.args["user"], reana_specification=request.json["reana_specification"], operational_options=request.json.get("operational_options", {}), type_=request.json["reana_specification"]["workflow"]["type"], logs="", git_ref=git_ref, git_repo=git_repo, ) Session.add(workflow) Session.object_session(workflow).commit() if git_ref: create_workflow_workspace( workflow.workspace_path, user_id=user.id_, git_url=git_data["git_url"], git_branch=git_data["git_branch"], git_ref=git_ref, ) else: create_workflow_workspace(workflow.workspace_path) return ( jsonify({ "message": "Workflow workspace created", "workflow_id": workflow.id_, "workflow_name": get_workflow_name(workflow), }), 201, ) except (REANAWorkflowNameError, KeyError) as e: return jsonify({"message": str(e)}), 400 except Exception as e: return jsonify({"message": str(e)}), 500
def create_job(): # noqa r"""Create a new job. --- post: summary: Creates a new job. description: >- This resource is expecting JSON data with all the necessary information of a new job. operationId: create_job consumes: - application/json produces: - application/json parameters: - name: job in: body description: Information needed to instantiate a Job required: true schema: $ref: '#/definitions/JobRequest' responses: 201: description: Request succeeded. The job has been launched. schema: type: object properties: job_id: type: string examples: application/json: { "job_id": "cdcf48b1-c2f3-4693-8230-b066e088c6ac" } 400: description: >- Request failed. The incoming data specification seems malformed. 500: description: >- Request failed. Internal controller error. The job could probably not have been allocated. """ json_data = request.get_json() if not json_data: return jsonify({'message': 'Empty request'}), 400 # Validate and deserialize input job_request, errors = job_request_schema.load(json_data) if errors: return jsonify(errors), 400 job_parameters = dict( job_id=str(job_request['job_id']), workflow_workspace=str(job_request['workflow_workspace']), docker_img=job_request['docker_img'], cmd=job_request['cmd'], cvmfs_mounts=job_request['cvmfs_mounts'], env_vars=job_request['env_vars'], shared_file_system=job_request['shared_file_system'], job_type=job_request.get('job_type')) job_obj = k8s_instantiate_job(**job_parameters) if job_obj: job = copy.deepcopy(job_request) job['status'] = 'started' job['restart_count'] = 0 job['max_restart_count'] = 3 job['deleted'] = False job['obj'] = job_obj JOB_DB[str(job['job_id'])] = job job_db_entry = JobTable( id_=job['job_id'], workflow_uuid=None, # The workflow_uuid is populated by the workflow-controller status=job['status'], job_type=job_request.get('job_type'), cvmfs_mounts=job_request['cvmfs_mounts'], shared_file_system=job_request['shared_file_system'], docker_img=job_request['docker_img'], experiment=job_request['experiment'], cmd=job_request['cmd'], env_vars=json.dumps(job_request['env_vars']), restart_count=job['restart_count'], max_restart_count=job['max_restart_count'], deleted=job['deleted'], name=job_request['job_name'], prettified_cmd=job_request['prettified_cmd']) Session.add(job_db_entry) Session.commit() access_times = calculate_file_access_time( json_data['workflow_workspace']) prepared_job_cache = JobCache() prepared_job_cache.job_id = job['job_id'] prepared_job_cache.access_times = access_times Session.add(prepared_job_cache) Session.commit() return jsonify({'job_id': job['job_id']}), 201 else: return jsonify({'job': 'Could not be allocated'}), 500