def send_started_email(job_id): """ Send an email to the user notifying them that their job started. Uses the pipeline_started.html template to render the message body. Args: job_id (str): a valid job ID Raises: NepheleError.NepheleEmailError: on failure """ user = DBUtils.get_user_by_job_id(job_id) job_args = DBUtils.get_job_arguments(job_id) job_desc = DBUtils.get_job_description(job_id) email_address = user.email_address mail_subject = DBUtils.make_email_subject(job_id) sorted_args = collections.OrderedDict(job_args) job_details = DBUtils.get_job_type_details(job_id) msg = _apply_template('pipeline_started', job_args=sorted_args, pipe_name=job_details['name'], job_id=job_id, job_desc=job_desc, neph_version=config.NEPHELE_VERSION, logUrl=tfvars.SERVER_ADDR+"/view_log/"+job_id) return _send(email_address, mail_subject, msg)
def create_user(email, fname, lname, affiliation, affiliation_category, ref, subscribe, analysis=None): """create_user :param email: :param fname: :param lname: :param affiliation: :param affiliation_category: :param ref: :param subscribe: :param analysis: """ DBUtils.create_user(email, fname, lname, affiliation, affiliation_category, ref, subscribe, analysis=analysis)
def confirm_user(user_email, is_confirmed): """confirm_user :param user_email: :param is_confirmed: """ DBUtils.set_confirm_email(user_email, is_confirmed)
def notify_user_job_completion(job_id): """ Sends an email to the user with a completion status. If the job succeeds, a success email is sent. If the job fails, any error message associated with the failure is retreived from the database and a failure email is sent. Args: job_id (str): the unique idenifier for the completed job Raises: Exception: any exception raised by either DBUtils or aws_utils """ user = DBUtils.get_user_by_job_id(job_id) job_details = DBUtils.get_job_type_details(job_id) job_type = job_details.get('data_type') mail_subject = DBUtils.make_email_subject(job_id) final_stat = DBUtils.get_job_status(job_id) if final_stat and final_stat == 'Succeeded': msg = _apply_template('pipeline_completed', base=tfvars.SERVER_ADDR, job_id=job_id, job_type=job_type, faq_url=FAQ_URL) else: err_msg = DBUtils.get_error_msg(job_id) msg = _apply_template('pipeline_error', base=tfvars.SERVER_ADDR, job_id=job_id, job_type=job_type, err_msg=err_msg) return _send(user.email_address, mail_subject, msg)
def manage_pending_q(): """ gets jobs that are due to be run, tries to start an EC2, if unable to start EC2 adds 30 mins to the backoff time (the time before which the job will be attempted again) Notifies the user, and then adds the job back to the pending queue. """ try: jobs = sqs.receive_pending() for job in jobs: try: instance = Serverd._launch_job(job.job_id) except ClientError as err: backoff = datetime.now() + timedelta(minutes=30) if not job.user_notified: email.notify_user_job_queued(job.job_id) sqs.add_to_pending(job.job_id, user_notified='true', backoff=backoff) N2Manager.notify_admin_neph_exception(err) if instance: DBUtils.set_machine_info(job.job_id, instance.id, instance.image_id, instance.instance_type) email.send_started_email(job.job_id) except ClientError as err: N2Manager.notify_admin_neph_exception(err)
def init_job(user_id): """ TODO CHECKS user_id is valid Creates a job id Creates the EFS location to store data (using job id)""" job_id = fs_utils.gen_uniq_dname(config.UPLOAD_PATH) DBUtils.create_job(user_id, job_id) return job_id
def rm_get_old_unsubmitted_jobs(): """ DAEMONIZED ON SERVER MACHINE. This is currently not doing anything. RM's jobs from DB """ print('Running rm_get_old_unsubmitted_jobs()') job_ids = DBUtils.get_old_unsubmitted_jobs() for job_id in job_ids: DBUtils.delete_job(job_id)
def notify_user_job_queued(job_id): """notify_user_job_queued :param job_id: Notifies users if job is unable to be run immedeatly due to high usage. """ user = DBUtils.get_user_by_job_id(job_id) mail_subject = DBUtils.make_email_subject(job_id) msg = _apply_template('job_queued', base=tfvars.SERVER_ADDR, job_id=job_id) _send(user.email_address, mail_subject, msg)
def send_infra_failure_email(msg, job_id=None): subject = "Infrastructure failure -- {env}".format( env=tfvars.environment) if job_id: stack_trace = DBUtils.get_stack_trace(job_id) if stack_trace: msg = msg + "\n" + stack_trace msg = _apply_template('infra_failure', jobid=job_id, stack=str(msg)) return _send(config.INFRA_MAIL, subject, msg)
def lambda_handler(event, _): """ Parses the lambda event and determines if action should be taken. """ if 'Records' in event: for record in event['Records']: if 'Sns' in record and 'Message' in record['Sns']: message = json.loads(record['Sns']['Message']) if message['notificationType'] == "Bounce": # Do this if email bounced email_list = message['bounce']['bouncedRecipients'] for bad_addr in email_list: DBUtils.set_bad_email(bad_addr['emailAddress']) elif message['notificationType'] == "Complaint": # Do this if we receive a complaint email_list = message['complaint']['complainedRecipients'] for bad_addr in email_list: DBUtils.set_bad_email(bad_addr['emailAddress'])
def job_description(self): """job_description returns the job_description string entered from the web form """ if self._is_nephele_job: try: from nephele2.rds.db_utils import DBUtils return DBUtils.get_job_description(self.job_id) except: return 'Unable to find job: {}'.format(self.job_id)
def log_to_db(self, job_id, stack=None, msg=None): """ Logs error messages to the db Args: job_id (`PipeBase.job_id`): run in Nephele stack (str): stack/error trace like from :py:func:`traceback.format_exc` msg (str): message to send the user in email """ if self._is_nephele_job: from nephele2.rds.db_utils import DBUtils DBUtils.set_job_status(job_id, 'Failed', stack_trace=stack, error=msg) else: print('Emailing:') print('Stack : ') print(stack) print('Message : ') print(msg)
def _launch_job(job_id): """ start a new worker instance & record machine details in DB mark job as XXXX in db (PROCESSING??) """ job = DBUtils.get_job(job_id) if not job: err = ("Failure at _start_ec2: no such job {}".format(job_id)) N2Manager.notify_admin_neph_exception(err) return None if job.status == 'Pre-Processing': # implies that the job has already started being processed # below. return None job_details = DBUtils.get_job_type_details(job_id) instance = ec2.start_worker_EC2(job_id, job_details['ami_id'], job_details['default_instance_type']) # ONLY set to pre Processing once we have an instance. # otherwise jobs put back into pending are ignored. DBUtils.set_job_status(job_id, 'Pre-Processing') return instance
def _job_expired_in_db(job_id): two_hours_ago = datetime.now() - timedelta(hours=2) day_ago = datetime.now() - timedelta(hours=24) job = DBUtils.get_job(job_id) if not job: print('Unable to find DB Entry for dir: ' + job_id) return False too_long_init = (job.status == "Initializing" and job.created < two_hours_ago) day_old_not_rmd = (job.status in ['Failed', 'Succeeded'] and job.completed < day_ago) if too_long_init or day_old_not_rmd: return True return False
def submit_job(job_id, job_name, job_desc, job_args): # form.instance_type <- FIXME: we need to pass this in and do something # with it """ Sets the job details in the database and puts the job ID in the pending queue to trigger job start. Raises an exception on error. Args: job_id (str): a valid job ID job_name (str): a valid job type job_args (JSON): a dict or JSON str of the job arguments Raises: """ # check job looks ok and try to add it to pending Q if isinstance(job_args, dict): job_args = json.dumps(job_args) DBUtils.set_job_type(job_id, job_name) DBUtils.set_job_description(job_id, job_desc) DBUtils.set_job_arguments(job_id, job_args) DBUtils.set_job_status(job_id, "Pending") sqs.add_to_pending(job_id)
def __init__(self, email_addr): self._email_addr = email_addr self._is_registered = False self._is_bad = False self._is_confirmed = False self._is_active = False self._has_compute = False (user, user_address) = DBUtils.get_user_and_email_by_email(email_addr) if user and user_address: self._is_registered = True if user_address.is_bad: self._is_bad = True if user_address.is_confirmed: self._is_confirmed = True if user.compute_remaining > 0: self._has_compute = True if (self._is_registered and self._is_confirmed and not self._is_bad and self._has_compute): self._is_active = True
def load_job_type_table(): """load_job_type_table""" DBUtils.load_job_types()
def get_user(email): """ Returns the user associated with the given email address. """ return DBUtils.get_user_by_email(email)
def process_job_on_worker(job_id): """ DAEMONIZED ON WORKER sets job to Running in DB gets job arguments from DB (& converts them to --arg_name format) starts job and waits for result marks that job result in DB (Succeeded || Failed) runs compress_results transfer data to S3 rm results Note, if an exception is thrown by local_exec, it will be logged in the DB """ job = Job(job_id) try: # learn script name, args, mark job as Running DBUtils.set_job_status(job_id, 'Running') args = DBUtils.get_job_arguments(job_id) script_name = DBUtils.get_script_name(job_id) # TODO this shouldn't be here - move to job? if 'ftp' in args.keys(): files = get_file_list(args['map_file']) fs_utils.ftp_get_files(args['ftp'], files, job.inputs) del args['ftp'] job.transfer_inputs() cli_args = _argify_dict(args) script_path = config.PIPELINES_LOC_ON_WRKR + script_name _local_exec(job_id, script_path, cli_args) # mark job as Succeeded. If fails to transfer, overwrite status. DBUtils.set_job_status(job_id, "Succeeded") except Exception as excpn: DBUtils.set_job_status(job_id, "Failed", stack_trace=str(excpn)) email.send_infra_failure_email(str(excpn), job_id=job_id) finally: try: job.compress_results() job.transfer_results() DBUtils.set_data_transferred(job_id, True) except Exception as excpn: DBUtils.set_job_status(job_id, "Failed", stack_trace=str(excpn)) email.send_infra_failure_email(str(excpn), job_id=job_id) try: job.remove_efs_dir() except Exception as excpn: email.send_infra_failure_email(str(excpn), job_id=job_id) email.notify_user_job_completion(job_id)