Пример #1
0
def process_datachecked_db(self, dc_job_id, spec):
    """ Task to wait until DCs finish and then respond e.g.
    * submit copy if DC succeed
    * send error email if not
    """
    # allow infinite retries
    self.max_retries = None
    src_uri = spec['src_uri']
    progress_msg = 'Datachecks in progress, please see: %sjobs/%s' % (cfg.dc_uri, dc_job_id)
    log_and_publish(make_report('INFO', progress_msg, spec, src_uri))
    try:
        result = dc_client.retrieve_job(dc_job_id)
    except Exception as e:
        err_msg = 'Handover failed, cannot retrieve datacheck job'
        log_and_publish(make_report('ERROR', err_msg, spec, src_uri))
        raise ValueError('Handover failed, cannot retrieve datacheck job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        log_and_publish(make_report('DEBUG', 'Datacheck Job incomplete, checking again later', spec, src_uri))
        raise self.retry()
    # check results
    elif result['status'] == 'failed':
        prob_msg = 'Datachecks found problems, you can download the output here: %sdownload_datacheck_outputs/%s' % (cfg.dc_uri, dc_job_id)
        log_and_publish(make_report('INFO', prob_msg, spec, src_uri))
        msg = """
Running datachecks on %s completed but found problems.
You can download the output here %s
""" % (src_uri, cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id))
        send_email(to_address=spec['contact'], subject='Datacheck found problems', body=msg, smtp_server=cfg.smtp_server)
    else:
        log_and_publish(make_report('INFO', 'Datachecks successful, starting copy', spec, src_uri))
        spec['progress_complete'] = 1
        submit_copy(spec)
Пример #2
0
def process_copied_db(self, copy_job_id, spec):
    """Wait for copy to complete and then respond accordingly:
    * if success, submit to metadata database
    * if failure, flag error using email"""
    # allow infinite retries
    self.max_retries = None
    src_uri = spec['src_uri']
    copy_in_progress_msg = 'Copying in progress, please see: %s%s' % (cfg.copy_web_uri, copy_job_id)
    log_and_publish(make_report('INFO', copy_in_progress_msg, spec, src_uri))
    try:
        result = db_copy_client.retrieve_job(copy_job_id)
    except Exception as e:
        log_and_publish(make_report('ERROR', 'Handover failed, cannot retrieve copy job', spec, src_uri))
        raise ValueError('Handover failed, cannot retrieve copy job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        log_and_publish(make_report('DEBUG', 'Database copy job incomplete, checking again later', spec, src_uri))
        raise self.retry()
    if result['status'] == 'failed':
        copy_failed_msg = 'Copy failed, please see: %s%s' % (cfg.copy_web_uri, copy_job_id)
        log_and_publish(make_report('INFO', copy_failed_msg, spec, src_uri))
        msg = """
Copying %s to %s failed.
Please see %s
""" % (src_uri, spec['tgt_uri'], cfg.copy_web_uri + str(copy_job_id))
        send_email(to_address=spec['contact'], subject='Database copy failed', body=msg, smtp_server=cfg.smtp_server)
        return
    elif 'GRCh37'in spec:
        log_and_publish(make_report('INFO', 'Copying complete, Handover successful', spec, src_uri))
        spec['progress_complete'] = 2
    else:
        log_and_publish(make_report('INFO', 'Copying complete, submitting metadata job', spec, src_uri))
        spec['progress_complete'] = 2
        submit_metadata_update(spec)
def process_db_metadata(self, metadata_job_id, spec):
    """Wait for metadata update to complete and then respond accordingly:
    * if success, submit event to event handler for further processing
    * if failure, flag error using email"""
    reporting.set_logger_context(get_logger(), spec['tgt_uri'], spec)
    # allow infinite retries
    self.max_retries = None
    get_logger().info("Loading into metadata database, please see: " +
                      cfg.meta_uri + "jobs/" + str(metadata_job_id))
    try:
        result = metadata_client.retrieve_job(metadata_job_id)
    except Exception as e:
        get_logger().error("Handover failed, Cannot retrieve metadata job")
        raise ValueError(
            "Handover failed, Cannot retrieve metadata job {}".format(e))
    if result['status'] in ['incomplete', 'running', 'submitted']:
        get_logger().debug(
            "Metadata load Job incomplete, checking again later")
        raise self.retry()
    if result['status'] == 'failed':
        get_logger().info("Metadata load failed, please see " + cfg.meta_uri +
                          'jobs/' + str(metadata_job_id) + '?format=failures')
        msg = """
Metadata load of %s failed.
Please see %s
""" % (spec['tgt_uri'],
        cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures')
        send_email(to_address=spec['contact'],
                   subject='Metadata load failed, please see: ' +
                   cfg.meta_uri + 'jobs/' + str(metadata_job_id) +
                   '?format=failures',
                   body=msg,
                   smtp_server=cfg.smtp_server)
        return
    else:
        #Cleaning up old assembly or old genebuild databases for Wormbase when database suffix has changed
        if 'events' in result['output'] and result['output']['events']:
            for event in result['output']['events']:
                details = json.loads(event['details'])
                if 'current_database_list' in details:
                    drop_current_databases(details['current_database_list'],
                                           spec['staging_uri'],
                                           spec['tgt_uri'])
                if event['genome'] in blat_species and event[
                        'type'] == 'new_assembly':
                    send_email(
                        to_address=cfg.production_email,
                        subject=
                        'BLAT species list needs updating in FTP Dumps config',
                        body='The following species ' + event['genome'] +
                        ' has a new assembly, please update the port number for this species here and communicate to Web: https://github.com/Ensembl/ensembl-production/blob/master/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm#L107'
                    )
        get_logger().info("Metadata load complete, Handover successful")
        spec['progress_complete'] = 3
        #get_logger().info("Metadata load complete, submitting event")
        #submit_event(spec,result)
        #qrp job submit
        submit_qrp_event(spec, result)
    return
Пример #4
0
def email(self, address, subject, body):
    """ Simple task to send an email as specified
    Arguments:
      smtp_server
      from_email_address
      subject
      body
    """
    send_email(smtp_server=smtp_server,
               from_email_address=from_email_address,
               address=address,
               subject=subject,
               body=body)
def process_checked_db(self, hc_job_id, spec):
    """ Task to wait until HCs finish and then respond e.g.
    * submit copy if HCs succeed
    * send error email if not
    """
    reporting.set_logger_context(get_logger(), spec['src_uri'], spec)
    # allow infinite retries
    self.max_retries = None
    get_logger().info("HCs in progress, please see: " + cfg.hc_web_uri +
                      str(hc_job_id))
    try:
        result = hc_client.retrieve_job(hc_job_id)
    except Exception as e:
        get_logger().error("Handover failed, cannot retrieve hc job")
        raise ValueError(
            "Handover failed, cannot retrieve hc job {}".format(e))
    if result['status'] in ['incomplete', 'running', 'submitted']:
        get_logger().debug("HC Job incomplete, checking again later")
        raise self.retry()
    # check results
    if result['status'] == 'failed':
        get_logger().info("HCs failed to run, please see: " + cfg.hc_web_uri +
                          str(hc_job_id))
        msg = """
Running healthchecks on %s failed to execute.
Please see %s
""" % (spec['src_uri'], cfg.hc_web_uri + str(hc_job_id))
        send_email(to_address=spec['contact'],
                   subject='HC failed to run',
                   body=msg,
                   smtp_server=cfg.smtp_server)
        return
    elif result['output']['status'] == 'failed':
        get_logger().info("HCs found problems, please see: " + cfg.hc_web_uri +
                          str(hc_job_id))
        msg = """
Running healthchecks on %s completed but found failures.
Please see %s
""" % (spec['src_uri'], cfg.hc_web_uri + str(hc_job_id))
        send_email(to_address=spec['contact'],
                   subject='HC ran but failed',
                   body=msg,
                   smtp_server=cfg.smtp_server)
        return
    else:
        get_logger().info("HCs fine, starting copy")
        spec['progress_complete'] = 1
        submit_copy(spec)
Пример #6
0
def process_db_metadata(self, metadata_job_id, spec):
    """Wait for metadata update to complete and then respond accordingly:
    * if success, submit event to event handler for further processing
    * if failure, flag error using email"""
    # allow infinite retries
    self.max_retries = None
    tgt_uri = spec['tgt_uri']
    loading_msg = 'Loading into metadata database, please see: %sjobs/%s' % (cfg.meta_uri, metadata_job_id)
    log_and_publish(make_report('INFO', loading_msg, spec, tgt_uri))
    try:
        result = metadata_client.retrieve_job(metadata_job_id)
    except Exception as e:
        err_msg = 'Handover failed, Cannot retrieve metadata job'
        log_and_publish(make_report('ERROR', err_msg, spec, tgt_uri))
        raise ValueError('Handover failed, Cannot retrieve metadata job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        incomplete_msg = 'Metadata load Job incomplete, checking again later'
        log_and_publish(make_report('DEBUG', incomplete_msg, spec, tgt_uri))
        raise self.retry()
    if result['status'] == 'failed':
        drop_msg='Dropping %s' % tgt_uri
        log_and_publish(make_report('INFO', drop_msg, spec, tgt_uri))
        drop_database(spec['tgt_uri'])
        failed_msg = 'Metadata load failed, please see %sjobs/%s?format=failures' % (cfg.meta_uri, metadata_job_id)
        log_and_publish(make_report('INFO', failed_msg, spec, tgt_uri))
        msg = """
Metadata load of %s failed.
Please see %s
""" % (tgt_uri, cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures')
        send_email(to_address=spec['contact'], subject='Metadata load failed, please see: '+cfg.meta_uri+ 'jobs/' + str(metadata_job_id) + '?format=failures', body=msg, smtp_server=cfg.smtp_server)
    else:
        # Cleaning up old assembly or old genebuild databases for Wormbase when database suffix has changed
        if 'events' in result['output'] and result['output']['events']:
            for event in result['output']['events']:
                details = json.loads(event['details'])
                if 'current_database_list' in details :
                    drop_current_databases(details['current_database_list'], spec)
                if event['genome'] in blat_species and event['type'] == 'new_assembly':
                    msg = 'The following species %s has a new assembly, please update the port number for this species here and communicate to Web: https://github.com/Ensembl/ensembl-production/blob/master/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm#L107' % event['genome']
                    send_email(to_address=cfg.production_email,
                               subject='BLAT species list needs updating in FTP Dumps config',
                               body=msg)
        log_and_publish(make_report('INFO', 'Metadata load complete, Handover successful', spec, tgt_uri))
        spec['progress_complete'] = 3
Пример #7
0
def email_when_complete(self, url, address):
    """ Task to check a URL and send an email once the result has a non-incomplete status
    Used for periodically checking whether a hive job has finished. If status is not complete,
    the task is retried
    Arguments:
      url - URL to check for job completion. Must return JSON containing status, subject and body fields
      address - address to send email
    """
    # allow infinite retries
    self.max_retries = None
    try:
        with http_session() as session:
            response = session.get(url)
        result = response.json()
    except requests.RequestException as e:
        logger.error('RequestsException: %s', e)
        raise self.retry(countdown=retry_wait, max_retries=120)
    except json.JSONDecodeError:
        err = 'Invalid response. Status: {} URL: {}'.format(
            response.status_code, response.url)
        logger.error('%s Body: %s', err, response.text)
        raise self.retry(countdown=retry_wait, max_retries=120)

    try:
        status = result['status']
        if status in ('incomplete', 'running', 'submitted'):
            # job incomplete so retry task after waiting
            raise self.retry(countdown=retry_wait)
        subject = result['subject']
        body = result['body']
    except KeyError as e:
        err = 'Invalid response. Missing parameter "{}". URL: {}'.format(
            str(e), response.url)
        logger.error('%s Body: %s', err, response.text)
        raise Reject(err, requeue=False)
    # job complete so send email and complete task
    send_email(smtp_server=smtp_server,
               from_email_address=from_email_address,
               to_address=address,
               subject=subject,
               body=body)
    return result
def process_copied_db(self, copy_job_id, spec):
    """Wait for copy to complete and then respond accordingly:
    * if success, submit to metadata database
    * if failure, flag error using email"""
    reporting.set_logger_context(get_logger(), spec['src_uri'], spec)
    # allow infinite retries
    self.max_retries = None
    get_logger().info("Copying in progress, please see: " + cfg.copy_web_uri +
                      str(copy_job_id))
    try:
        result = db_copy_client.retrieve_job(copy_job_id)
    except Exception as e:
        get_logger().error("Handover failed, cannot retrieve copy job")
        raise ValueError(
            "Handover failed, cannot retrieve copy job {}".format(e))
    if result['status'] in ['incomplete', 'running', 'submitted']:
        get_logger().debug(
            "Database copy job incomplete, checking again later")
        raise self.retry()
    if result['status'] == 'failed':
        get_logger().info("Copy failed, please see: " + cfg.copy_web_uri +
                          str(copy_job_id))
        msg = """
Copying %s to %s failed.
Please see %s
""" % (spec['src_uri'], spec['tgt_uri'], cfg.copy_web_uri + str(copy_job_id))
        send_email(to_address=spec['contact'],
                   subject='Database copy failed',
                   body=msg,
                   smtp_server=cfg.smtp_server)
        return
    elif 'GRCh37' in spec:
        get_logger().info("Copying complete, Handover successful")
        spec['progress_complete'] = 2
    else:
        get_logger().info("Copying complete, submitting metadata job")
        spec['progress_complete'] = 2
        submit_metadata_update(spec)
def process_datachecked_db(self, dc_job_id, spec):
    """ Task to wait until DCs finish and then respond e.g.
    * submit copy if DC succeed
    * send error email if not
    """
    reporting.set_logger_context(get_logger(), spec['src_uri'], spec)
    # allow infinite retries
    self.max_retries = None
    get_logger().info("Datachecks in progress, please see: " + cfg.dc_uri +
                      "jobs/" + str(dc_job_id))
    try:
        result = dc_client.retrieve_job(dc_job_id)
    except Exception as e:
        get_logger().error("Handover failed, cannot retrieve datacheck job")
        raise ValueError(
            "Handover failed, cannot retrieve datacheck job {}".format(e))
    if result['status'] in ['incomplete', 'running', 'submitted']:
        get_logger().debug("Datacheck Job incomplete, checking again later")
        raise self.retry()
    # check results
    if result['status'] == 'failed':
        get_logger().info(
            "Datachecks found problems, you can download the output here: " +
            cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id))
        msg = """
Running datachecks on %s completed but found problems.
You can download the output here %s
""" % (spec['src_uri'],
        cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id))
        send_email(to_address=spec['contact'],
                   subject='Datachecks found problems',
                   body=msg,
                   smtp_server=cfg.smtp_server)
        return
    else:
        get_logger().info("Datachecks successful, starting copy")
        spec['progress_complete'] = 1
        submit_copy(spec)
def email_when_complete(self, url, address):
    """ Task to check a URL and send an email once the result has a non-incomplete status
    Used for periodically checking whether a hive job has finished. If status is not complete,
    the task is retried
    Arguments:
      url - URL to check for job completion. Must return JSON containing status, subject and body fields
      address - address to send email
    """
    # allow infinite retries
    self.max_retries = None
    result = requests.get(url).json()
    if (result['status']
            == 'incomplete') or (result['status']
                                 == 'running') or (result['status']
                                                   == 'submitted'):
        # job incomplete so retry task after waiting
        raise self.retry(countdown=retry_wait)
    # job complete so send email and complete task
    send_email(smtp_server=smtp_server,
               from_email_address=from_email_address,
               to_address=address,
               subject=result['subject'],
               body=result['body'])
    return result