Пример #1
0
def add_instance_record(cfg, conn):
    log.debug('Adding instance record')

    instance_record = {}
    if cfg['proc_node_type'] != 'CLOUD':
        instance_record['instance_id'] = socket.gethostname()
        instance_record['instance_type'] = 'on-prem'
    else:
        instance_record['instance_id'] = get_instance_id()
        instance_record['instance_type'] = get_instance_type()
    instance_record['local_queue_id'] = cfg['id']
    cfg['instance_record'] = instance_record

    try:
        instance_record_sql = '''
            insert into instance_records (instance_id, local_queue_id, start_time, instance_type)
            values (%(instance_id)s, %(local_queue_id)s, current_timestamp, %(instance_type)s)
        '''
        query_database(conn=conn,
                       query=instance_record_sql,
                       params=instance_record,
                       commit=True)

    except Exception:
        log.exception("Instance record could not be inserted")
    else:
        log.info("Instance record of instance %s and job %s inserted",
                 instance_record['instance_id'],
                 instance_record['local_queue_id'])
Пример #2
0
def hyp3_process(cfg, n):
    try:
        log.info('Processing hello_world')
        if not cfg['skip_processing']:
            log.info(f'Process starting at {datetime.now()}')
            launch_dir = os.getcwd()
            os.chdir(cfg['workdir'])

            hyp3proclib.process(cfg, 'proc_ci', ["--hello-world"])

            os.chdir(launch_dir)
        else:
            log.info('Processing skipped!')
            cfg['log'] += "(debug mode)"

        cfg['success'] = True
        hyp3proclib.update_completed_time(cfg)

        product_dir = os.path.join(cfg['workdir'], 'PRODUCT')
        if not os.path.isdir(product_dir):
            log.info(f'PRODUCT directory not found: {product_dir}')
            log.error('Processing failed')
            raise Exception('Processing failed: PRODUCT directory not found')

        # TODO: final product cleanup and upload to HyP3 DB

    except Exception as e:
        log.exception('ci processing failed!')
        log.exception('Notifying user')
        hyp3proclib.failure(cfg, str(e))

    hyp3proclib.file_system.cleanup_workdir(cfg)

    log.info('ci done')
Пример #3
0
def send_email(
        to_address, subject, body, from_address="no-reply@asf-hyp3", retries=0,
        maximum_retries=0, mime_type="plain"):
    """Send an email and return whether the email was successfully sent.

    We also retry sending the email if something went wrong the first
    time, with the maximum number of retries configurable in the
    arguments. This method only supports sending plain text emails.
    """
    if retries > maximum_retries:
        log.critical(
            "Notification failed permanently (maximum retries reached)",
        )
        return False, None
    if retries == 0:
        log.info("Sending email")
    else:
        log.info("Retrying email")

    smtp = smtplib.SMTP("localhost")

    msg = MIMEMultipart('related')
    msg["Subject"] = subject
    msg["From"] = from_address
    msg["To"] = to_address
    msg.preamble = 'This is a multi-part message in MIME format.'

    msgAlt = MIMEMultipart('alternative')
    msg.attach(msgAlt)

    msgText = MIMEText('HyP3 product notification email')
    msgAlt.attach(msgText)

    msgText = MIMEText(body)
    msgText.replace_header('Content-Type', 'text/html')
    msgAlt.attach(msgText)

    log.debug("Sending email from {0} to {1}".format(from_address, to_address))

    bcc_address = []
    bcc = get_config('general', 'bcc', default='')
    if len(bcc) > 0:
        bcc_address += bcc.split(',')
        log.debug("Bcc: " + str(bcc_address))

    try:
        smtp.sendmail(from_address, [to_address] + bcc_address, msg.as_string())
    except smtplib.SMTPException as e:
        msg = str(e)
        log.error("Failed to notify user: "******"Notification failed permanently (maximum retries reached)")
            return False, msg

        return send_email(to_address, subject, body, from_address, retries + 1, maximum_retries)

    smtp.quit()
    return True, None
Пример #4
0
def check_stopfile(cfg, stopfile):
    if os.path.isfile(stopfile):
        log.info('Found stopfile: ' + stopfile)
        log.debug('Removing stopfile: ' + stopfile)
        os.remove(stopfile)
        log.info('Stopping')
        cleanup_lockfile(cfg)
        sys.exit(0)
Пример #5
0
    def _process_all(self, total):
        for n in range(total):
            found = self._process_one(n)

            log.info('Processed {0}/{1} products.'.format(n + 1, total))

            if self.sleep_time > 0:
                time.sleep(self.sleep_time)

            if not found and self.stop_if_none:
                break
Пример #6
0
def find_in_dir(dir_, all_strs, any_strs=("",)):
    if not os.path.isdir(dir_):
        return None

    for subdir, dirs, files in os.walk(dir_):
        for file in files:
            filepath = os.path.join(subdir, file)
            if any(s in filepath for s in any_strs) and all(s in filepath for s in all_strs):
                log.info('Found: ' + filepath)
                return filepath

    return None
Пример #7
0
def cleanup_workdir(cfg):
    if 'workdir' in cfg:
        if os.path.isdir(cfg['workdir']):
            if cfg['keep']:
                log.info('Not removing working directory: ' + cfg['workdir'])
            else:
                log.info('Cleaning up working directory: ' + cfg['workdir'])
                shutil.rmtree(cfg['workdir'])
        else:
            log.warn('Could not clean the workdir, not found: ' + cfg['workdir'])

    cleanup_env(cfg)
Пример #8
0
    def run(self):
        self.cfg = setup(self.proc_name,
                         cli_args=self.cli_args,
                         sci_version=self.sci_version)

        with manage_instance_and_lockfile(self.cfg):
            total = self.cfg['num_to_process']

            log.info('Starting')
            log.debug('Processing {0} products.'.format(total))

            self._process_all(total)

            log.info('Done')
Пример #9
0
def cleanup_lockfile(cfg):
    if 'lock_file' not in cfg:
        log.warn('No lock_file set!')
        return

    lock_file = cfg['lock_file']

    if 'lock_file' not in cfg or lock_file is None or len(lock_file) == 0:
        log.info('Internal error: no lock file set.')
        return

    if os.path.isfile(lock_file):
        log.info('Removing lock file ' + lock_file)
        os.unlink(lock_file)
    else:
        log.warn('Lock file not found: ' + lock_file)
Пример #10
0
def log_instance_shutdown_in_hyp3_db(cfg):
    if cfg['proc_node_type'] != 'CLOUD':
        return

    instance = get_instance_info(cfg)
    try:
        with get_db_connection('hyp3-db') as hyp3db_conn:
            sql = "update instances set shutdown_time = current_timestamp where id = (%(instance_id)s)"
            query_database(hyp3db_conn, sql, instance, commit=True)
    except Exception as e:
        log.error("Instance %s could not be updated with shutdown time",
                  instance["instance_id"])
        log.error("Error was: %s", str(e))
    else:
        log.info("Instance %s was updated with shutdown time",
                 instance["instance_id"])
Пример #11
0
def notify_user_failure(cfg, conn, msg):
    if cfg['notify_fail'] is False:
        log.info('Notifications for failures not turned on.')
        return

    log.debug('Preparing to notify user of processing failure')

    username, email, wants_email, subscription_name, process_name = get_user_info(cfg, conn)

    if wants_email:
        log.debug('Notifying {0}...'.format(username))
        message = "Hi, {0}\n\n".format(username)

        if cfg['sub_id'] > 0:
            message += "Your subscription '{0}' attempted to process a product but failed.\n\n".format(subscription_name)
            subject = "[{0}] Failed processing for subscription '{1}'".format(cfg['subject_prefix'], subscription_name)
        else:
            message += "Your one-time '{0}' processing request failed.\n\n".format(process_name)
            subject = "[{0}] Failed one-time processing for '{1}'".format(cfg['subject_prefix'], process_name)

        # if len(msg.strip())>0:
        #    message += "\n" + "Captured error message:\n" + msg + "\n\n"

        if 'granule_url' in cfg and len(cfg['granule_url']) > 0:
            message += "You can download the original data here:<br>" + cfg['granule_url'] + "<br>"
            if cfg['other_granule_urls'] is not None:
                for url in cfg['other_granule_urls']:
                    message += url + "<br>"

        if "email_text" in cfg and len(cfg["email_text"]) > 0:
            message += "\n" + cfg["email_text"] + "\n\n"
        else:
            message += "\n"

        if cfg['sub_id'] > 0:
            param = str(cfg['sub_id'])
            id_, hashval = create_one_time_hash(conn, 'disable_subscription', cfg['user_id'], param)
            message += "Disable this subscription:\n" \
                "https://api.hyp3.asf.alaska.edu/onetime/disable_subscription?id="+str(id_)+"&key="+hashval+"\n\n"

        # message += "Captured processing info:\n\n" + cfg['log']

        queue_email(conn, cfg['id'], email, subject, message)
    else:
        log.info("Email will not be sent to user {0} due to user preference".format(username))
Пример #12
0
def add_instance_to_hyp3_db(cfg):
    if cfg['proc_node_type'] != 'CLOUD':
        return
    instance = get_instance_info(cfg)
    try:
        with get_db_connection('hyp3-db') as hyp3db_conn:
            sql = 'insert into instances (id, start_time, process_id) values (%(instance_id)s, current_timestamp, %(process_id)s);'
            query_database(conn=hyp3db_conn,
                           query=sql,
                           params=instance,
                           commit=True)
    except Exception as e:
        log.error("Instance %s could not be inserted into instances",
                  instance['instance_id'])
        log.error("Error was: %s", str(e))
    else:
        log.info("Instance %s was inserted into instances",
                 instance['instance_id'])
Пример #13
0
def get_db_connection(s, tries=0):
    connection_string =\
        "host='" + get_config(s, 'host') + "' " + \
        "dbname='" + get_config(s, 'db') + "' " + \
        "user='******'user') + "' " + \
        "password='******'pass') + "'"
    log.info("Connected to db: {0}".format(get_config(s, 'host')))
    try:
        conn = psycopg2.connect(connection_string)
    except Exception as e:
        if (tries > 4):
            log.exception('DB connection problem: ' + str(e))
            raise
        else:
            log.warning("Problem connecting to DB: " + str(e))
            log.info("Retrying in {0} seconds...".format(30 * (tries + 1)))
            time.sleep(30 * (tries + 1))
            return get_db_connection(s, tries=tries + 1)

    return conn
Пример #14
0
def update_instance_with_specific_gamma_id(cfg):
    if cfg['proc_node_type'] != 'CLOUD':
        return

    instance = get_instance_info(cfg)
    try:
        with get_db_connection('hyp3-db') as hyp3db_conn:
            sql = 'update instances set process_id = %(process_id)s where id = %(instance_id)s'
            query_database(conn=hyp3db_conn,
                           query=sql,
                           params=instance,
                           commit=True)
    except Exception as e:
        log.error(
            "any_gamma instance %s could not be updated with specific gamma process id, %s",
            instance['instance_id'], cfg['proc_name'])
        log.error("Error was: %s", str(e))
    else:
        log.info(
            "any_gamma instance %s was update with specific gamma process id, %s",
            instance['instance_id'], cfg['proc_name'])
Пример #15
0
def update_instance_record(cfg, conn):
    if 'instance_record' in cfg:
        instance_record = cfg['instance_record']
        try:
            instance_record_sql = 'update instance_records set end_time=current_timestamp where (instance_id=%(instance_id)s and local_queue_id=%(local_queue_id)s);'
            query_database(conn=conn,
                           query=instance_record_sql,
                           params=instance_record,
                           commit=True)
        except Exception:
            log.exception(
                "Instance record for instance %s and job %s could not be updated with job completion time",
                instance_record['instance_id'],
                instance_record['local_queue_id'])
        else:
            log.info(
                "Instance record for instance %s and job %s had end_time updated with job completion time",
                instance_record['instance_id'],
                instance_record['local_queue_id'])
    else:
        log.debug('No instance record found to update')
Пример #16
0
def setup_workdir(cfg):
    if cfg['user_workdir'] and len(cfg['workdir']) > 0:
        wd = cfg['workdir']
        log.info('Using previous working directory (will not process)')
        log.info('Directory is: ' + wd)
    else:
        s = cfg['proc_name'] + '_' + str(os.getpid()) + '_' + random_string(6)

        # Hack to avoid creating a bunch of pointless directories
        if 'notify' in s:
            return

        wd = os.path.join(cfg['workdir'], s)
        cfg['workdir'] = wd

        log.debug('Workdir is: ' + wd)
        if os.path.isdir(wd):
            log.warn('Working directory already exists!  Removing...')
            shutil.rmtree(wd)

        log.info('Creating work directory: ' + wd)
        os.mkdir(wd)

    # Some of the processes are location dependent!
    os.chdir(wd)
Пример #17
0
def send_queued_emails():
    with get_db_connection('hyp3-db') as conn:
        sql = '''
            select id, local_queue_id, recipients, subject, message, attachment_filename, attachment, mime_type
            from email_queue where status = 'QUEUED'
        '''
        recs = query_database(conn, sql)
        if len(recs) == 0:
            log.info('No emails to send')

        for r in recs:
            if r and r[0] and r[2] and len(r[2]) > 0:
                id_ = int(r[0])
                lqid = None
                if r[1] is not None:
                    lqid = int(r[1])
                to = r[2]
                subject = r[3]
                body = r[4]

                mime_type = "plain"
                if r[7] is not None:
                    mime_type = r[7]
                if mime_type == "text":
                    mime_type = "plain"

                log.info('Emailing ' + to + ' for lqid: ' + str(lqid))
                log.debug('Subject: ' + subject)

                ok, msg = send_email(to, subject, body, mime_type=mime_type)
                if ok:
                    status = 'SENT'
                else:
                    status = 'FAILED'

                log.debug('Updating status to ' + status)
                sql = "update email_queue set status = %(status)s, system_message = %(msg)s, processed_time = current_timestamp where id = %(id)s"
                query_database(conn, sql, {'status': status, 'msg': msg, 'id': id_}, commit=True)
Пример #18
0
def check_lockfile_pid(lock_file):
    pid = str(os.getpid())
    with open(lock_file, 'r') as lock:
        lock_file_pid = lock.read()
        if str(lock_file_pid) != str(pid):
            log.info('Lock file does not contain process PID')
            log.info('Process PID: "{}"   File PID: "{}"'.format(pid, lock_file_pid))
            log.info('Exiting without cleaning')
            sys.exit(0)
Пример #19
0
def check_lockfile(cfg):
    lock_file = os.path.join(cfg['lock_dir'], cfg['proc_name'] + '.lock')
    cfg['lock_file'] = lock_file

    if os.path.isfile(lock_file):
        log.info('Lock file exists: ' + lock_file)
        log.info('Exiting -- already running.')
        sys.exit(0)

    # We use os.open with O_CREAT so that two ingests don't both do the
    # above check, and pass, and then both try to create the lock file,
    # and both succeed - this way one will fail
    try:
        o = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
        fd = os.fdopen(o, 'w')
    except Exception as e:
        log.warning('Failed to open lock file: ' + str(e))
        fd = None

    if not fd:
        log.error('Could not open lock file: ' + lock_file)
        sys.exit(1)

    pid = str(os.getpid())
    fd.write(pid)
    fd.close()

    # Now check the file just in case...
    with open(lock_file, 'r') as fd:
        s = fd.read()

    if s != pid:
        log.error('Failed to correctly initialize lock file')
        sys.exit(1)
    else:
        log.info('Acquired lock file, PID is ' + pid)
Пример #20
0
def process_insar(cfg, n):
    try:
        log.info('Processing ISCE InSAR pair "{0}" for "{1}"'.format(cfg['sub_name'], cfg['username']))

        g1, g2 = earlier_granule_first(cfg['granule'], cfg['other_granules'][0])

        list_file = 'list.csv'
        write_list_file(os.path.join(cfg['workdir'], list_file), g1, g2)

        d1 = g1[17:25]
        d2 = g2[17:25]
        delta = (datetime.datetime.strptime(d2, '%Y%m%d')-datetime.datetime.strptime(d1, '%Y%m%d')).days
        ifm_dir = d1 + '_' + d2
        cfg['ifm'] = ifm_dir
        log.debug('IFM dir is: ' + ifm_dir)

        sd1 = d1[0:4]+'-'+d1[4:6]+'-'+d1[6:8]
        sd2 = d2[0:4]+'-'+d2[4:6]+'-'+d2[6:8]
        cfg["email_text"] = "This is a {0}-day InSAR pair from {1} to {2}.".format(delta, sd1, sd2)

        subswath = get_extra_arg(cfg, "subswath", "0")
        if subswath == "0":
            process(cfg, 'procAllS1StackISCE.py', ["-90", "90", "-180", "180", "-f", list_file, "-d"])
        else:
            process(cfg, 'procS1StackISCE.py', ["-f", list_file, "-d", "-s", subswath])

        subdir = os.path.join(cfg['workdir'], 'PRODUCT')
        if not os.path.isdir(subdir):
            log.info('PRODUCT directory not found: ' + subdir)
            log.error('Processing failed')
            raise Exception("Processing failed: PRODUCT directory not found")
        else:
            looks = get_looks(subdir)
            out_name = build_output_name_pair(g1, g2, cfg['workdir'], looks + "-iw" + subswath + cfg['suffix'])
            log.info('Output name: ' + out_name)

            out_path = os.path.join(cfg['workdir'], out_name)
            zip_file = out_path + '.zip'
            if os.path.isdir(out_path):
                shutil.rmtree(out_path)
            if os.path.isfile(zip_file):
                os.unlink(zip_file)
            cfg['out_path'] = out_path

            # clip_tiffs_to_roi(cfg, conn, product)

            log.debug('Renaming '+subdir+' to '+out_path)
            os.rename(subdir, out_path)

            find_browses(cfg, out_path)

            cfg['attachment'] = find_phase_png(out_path)
            add_esa_citation(g1, out_path)
            zip_dir(out_path, zip_file)

            cfg['final_product_size'] = [os.stat(zip_file).st_size, ]
            cfg['original_product_size'] = 0

            with get_db_connection('hyp3-db') as conn:
                record_metrics(cfg, conn)
                upload_product(zip_file, cfg, conn)
                success(conn, cfg)

    except Exception as e:
        log.exception('Processing failed')
        log.info('Notifying user')
        failure(cfg, str(e))

    cleanup_workdir(cfg)

    log.info('Done')
Пример #21
0
def notify_user(product_url, queue_id, cfg, conn):
    """Email a user notifying them of a finished product.

    Takes the name of a finished product, download link for the finished
    product, subscription ID for the product, the configuration
    parameters, and a database connection, and emails the user to notify
    them that the product has been finished and provide them with the
    download links for both the finished product and the original
    granule.

    This function return True upon success and False upon failure.
    """
    username, email, wants_email, subscription_name, process_name = get_user_info(cfg, conn)

    if cfg['sub_id'] > 0:
        title = "A new '{0}' product for your subscription '{1}' is ready.".format(process_name, subscription_name)
        subject = "[{0}] New product for subscription '{1}'".format(cfg['subject_prefix'], subscription_name)
    else:
        title = "A new product for your '{0}' one-time processing request has been generated.".format(process_name)
        subject = "[{0}] New {1} product available".format(cfg['subject_prefix'], process_name)

    message = get_email_header(title)

    message += "<p>Hello HyP3-User!"
    message += "<p>" + title + "\n"

    if 'description' in cfg and cfg['description'] and len(cfg['description']) > 0:
        message += "<p>" + escape(cfg['description'], quote=False).replace('\n', '<br>') + "<br>\n"

    if process_name != "Notify Only":
        message += '<p>You can download it here:<br><a href="{0}">{1}</a><br><br>\n'.format(product_url, cfg['filename'])

        if 'browse_url' in cfg and cfg['browse_url'] is not None and len(cfg['browse_url']) > 0:
            message += '<center><a href="{0}"><img src="{1}" width="80%" border="0"/></a></center><br>\n'.format(cfg['browse_url'], cfg['browse_url'])

        if 'final_product_size' in cfg:
            sz = cfg['final_product_size'][0]
            mb = float(sz)/1024.0/1024.0
            message += "<p>Size: %.2f MB<br><br>\n" % mb

        message += "You can find all of your products at the HyP3 website:<br>{0}/products<br>\n".format(cfg['hyp3_product_url'])

        if 'granule_url' in cfg and len(str(cfg['granule_url'])) > 0 and 'Subscription: ' not in str(cfg['granule_url']):
            message += "<p>You can download the original data from the ASF datapool here:<br>" + urlify(cfg['granule_url']) + "<br>\n"
            if 'other_granule_urls' in cfg and cfg['other_granule_urls'] is not None:
                for url in cfg['other_granule_urls']:
                    message += urlify(url) + "<br>\n"

        if 'SLC' in cfg['granule']:
            message += '<p>View this stack in the ASF baseline tool:<br>'
            message += 'http://baseline.asf.alaska.edu/#baseline?granule={0}\n'.format(cfg['granule'])
    else:
        message += "<p>You can download it here:<br>" + urlify(product_url) + "<br>"

    if "email_text" in cfg and len(cfg["email_text"]) > 0:
        message += "<p>" + cfg["email_text"] + "<br>"
    if 'process_time' in cfg:
        message += process_name + " processing time: " + str(datetime.timedelta(seconds=int(cfg['process_time']))) + "<br>\n"

    if cfg['sub_id'] > 0:
        param = str(cfg['sub_id'])
        id_, hashval = create_one_time_hash(conn, 'disable_subscription', cfg['user_id'], param)
        message += "<p>Done with this subscription?  Disable it with this link:<br>" \
                   "https://api.hyp3.asf.alaska.edu/onetime/disable_subscription?id="+str(id_)+"&key="+hashval+"<br><br>\n"

    message += get_email_footer()

    # message += "Hostname: " + socket.gethostname() + "\n"
    if wants_email:
        log.info('Emailing: ' + email)
        queue_email(conn, cfg['id'], email, subject, usr(message, username))
    else:
        log.info("Email will not be sent to user {0} due to user preference".format(username))

        bcc = get_config('general', 'bcc', default='')
        if len(bcc) > 0:
            # We only have to do the first one, the rest will be bcc'ed :)
            addr = bcc.split(',')[0]
            log.debug('Queueing email for BCC user: '******'id'], addr, subject, usr(message, username))
Пример #22
0
def find_rtc_zip(dir_, orbit):
    log.debug('Orbit: ' + orbit)
    rtc_zip = find_in_dir(dir_, [".zip", "AP_", orbit])
    log.info("Found RTC zip: " + rtc_zip)
    return rtc_zip
Пример #23
0
def check_lockfile_exists(lock_file):
    if not os.path.isfile(lock_file):
        log.info('Lock file does not exist')
        log.info('Stopping')
        sys.exit(0)