示例#1
0
def pfw_dynam_load_class(pfw_dbh, wcl, parent_tid, attempt_task_id,
                         label, classname, extra_info):
    """ Dynamically load a class save timing info in task table """

    #task_id = -1
    #if pfw_dbh is not None:
    #    task_id = pfw_dbh.create_task(name='dynclass',
    #                                  info_table=None,
    #                                  parent_task_id=parent_tid,
    #                                  root_task_id=attempt_task_id,
    #                                  label=label,
    #                                  do_begin=True,
    #                                  do_commit=True)

    the_class_obj = None
    try:
        the_class = miscutils.dynamically_load_class(classname)
        valdict = {}
        try:
            valdict = miscutils.get_config_vals(extra_info, wcl, the_class.requested_config_vals())
        except AttributeError: # in case the_class doesn't have requested_config_vals
            pass
        the_class_obj = the_class(valdict, wcl)
    except:
        (extype, exvalue, _) = sys.exc_info()
        msg = f"Error: creating {label} object - {extype} - {exvalue}"
        print(f"\n{msg}")
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, wcl['pfw_attempt_id'], parent_tid, msg, pfwdefs.PFWDB_MSG_ERROR)
        raise

    #if pfw_dbh is not None:
    #    pfw_dbh.end_task(task_id, pfwdefs.PF_EXIT_SUCCESS, True)

    return the_class_obj
示例#2
0
def begrun(argv):
    """TODO: short summary.

    Performs steps executed on submit machine at beginning of processing
    attempt.
    """
    pfw_dbh = None
    try:
        configfile = argv[0]
        config = pfwconfig.PfwConfig({'wclfile': configfile})

        if miscutils.fwdebug_check(6, 'BEGRUN_DEBUG'):
            miscutils.fwdebug_print(
                'use_home_archive_output = %s' %
                config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT))

        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            import processingfw.pfwdb as pfwdb
            pfw_dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            pfw_dbh.begin_task(config['task_id']['attempt'], True)

        # the three wcl files to copy to the home archive
        origwcl = config['origwcl']
        expwcl = config['expwcl']
        fullwcl = config['fullwcl']

        # if not a dryrun and using a home archive for output
        if (config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never'
                and 'submit_files_mvmt' in config and
            (pfwdefs.PF_DRYRUN not in config
             or not miscutils.convertBool(config.getfull(pfwdefs.PF_DRYRUN)))):

            # get home archive info
            home_archive = config.getfull('home_archive')
            archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive]

            # load filemgmt class
            attempt_tid = config['task_id']['attempt']
            filemgmt = pfwutils.pfw_dynam_load_class(pfw_dbh, config,
                                                     attempt_tid, attempt_tid,
                                                     "filemgmt",
                                                     archive_info['filemgmt'],
                                                     archive_info)

            # save file information
            filemgmt.register_file_data('wcl', [origwcl, expwcl, fullwcl],
                                        config['pfw_attempt_id'], attempt_tid,
                                        False, None, None)
            copy_files_home(config, archive_info, filemgmt)
            filemgmt.commit()

        if pfw_dbh is not None:
            print("Saving attempt's archive path into PFW tables...", end=' ')
            pfw_dbh.update_attempt_archive_path(config)
            pfw_dbh.commit()
    except Exception as exc:
        msg = "begrun: %s: %s" % (exc.__class__.__name__, str(exc))
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
    except SystemExit as exc:
        msg = "begrun: SysExit=%s" % str(exc)
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
示例#3
0
def blockpost(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    print("running on %s" % (socket.gethostname()))

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
    miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'block',
        {pfwdefs.PF_CURRVALS: {
            'flabel': 'blockpost',
            'fsuffix': 'out'
        }})
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    reqnum = config.getfull(pfwdefs.REQNUM)
    unitname = config.getfull(pfwdefs.UNITNAME)
    attnum = config.getfull(pfwdefs.ATTNUM)
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrap_bymod = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}  # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        sem = None
        try:
            miscutils.fwdebug_print("Connecting to DB")
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
            if verify_files:
                curs = dbh.cursor()
                curs.execute("select root from ops_archive where name='%s'" %
                             (config.getfull('home_archive')))
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(
                        "Invalid archive name (%s).   Found %s rows in ops_archive"
                        % (config.getfull('home_archive'), len(rows)))
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(
                        "Cannot read archive root directory:%s This program must be run on an NCSA machine with access to the archive storage system."
                        % (config.getfull('home_archive')))
                sem = dbsem.DBSemaphore(
                    'verify_files_10', None,
                    config.getfull('submit_des_services'),
                    config.getfull('submit_des_db_section'))
                print(
                    "\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(
                    dbh=dbh,
                    archive=config.getfull('home_archive'),
                    pfwid=attid,
                    filesize=True,
                    md5sum=False,
                    quick=True,
                    debug=False,
                    script=False,
                    verbose=False,
                    silent=True)
                if sem is not None:
                    del sem
                print("  Verification of files returned status %i" %
                      (verify_status))
                if verify_status != 0:
                    print(
                        "  This indicates that one or more files do not have the correct file size (based on DB entries). Run"
                    )
                    print(
                        "\n    compare_db.py --des_services %s --section %s --archive %s --pfwid %i --filesize --verbose"
                        % (config.getfull('submit_des_services'),
                           config.getfull('submit_des_db_section'),
                           config.getfull('home_archive'), int(attid)))
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                                   config.getfull('submit_des_db_section'))

            print("\n\nChecking non-job block task status from task table in DB (%s is success)" % \
                  pfwdefs.PF_EXIT_SUCCESS)
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id']
                    and str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(
                    "Done getting block task info from DB (%s secs)" %
                    (end_time - start_time))
                for bltdict in list(bltasks.values()):
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += "\t%s" % (bltdict['name'])
                        if bltdict['label'] is not None:
                            msg2 += " - %s" % (bltdict['label'])
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print(
                                "Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = "select id from task where parent_task_id=%i and status!=0" % (
                                begblock_tid)
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print(
                                    "Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(
                                    wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(
                                    "Done querying QCF messages (%s secs)" %
                                    (end_time - start_time))
                                miscutils.fwdebug_print("wrapmsg = %s" %
                                                        wrapmsg)
                            if len(wrapmsg) == 0:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in list(wrapmsg.values()):
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print("\n\nChecking job status from pfw_job table in DB (%s is success)" % \
                        pfwdefs.PF_EXIT_SUCCESS)

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting job info from DB (%s secs)" %
                        (end_time - start_time))

                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid,
                                                    pfw_block_task_id=blktid)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting wrapper info from DB (%s secs)" %
                        (end_time - start_time))
            else:
                msg = "Could not find task id for block %s in config.des" % blockname
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid,
                                          config['task_id']['attempt'], msg,
                                          pfw_utils.PFW_DB_INFO,
                                          'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done saving pfw message (%s secs)" %
                        (end_time - start_time))
                print("all the task ids:", config['task_id'])

            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_fail_log_fullnames(attid, archive)
            dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print("Warn: could not find jobs for block %s" % blknum)
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:" %
                      list(job_byblk.keys()))
            else:
                wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo)
                #print "wrap_byjob:", wrap_byjob
                #print "wrap_bymod:", wrap_bymod
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict[
                            'status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = "%s/B%02d-%s/%04d" % (
                        config.getfull('work_dir'),
                        int(config.getfull('blknum')),
                        config.getfull('blockname'), int(jobdict['jobnum']))
                    msg2 += "\n\t%s (%s) " % (pfwutils.pad_jobnum(
                        jobdict['jobnum']), jobkeys)

                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid].keys())
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += "%d/%s  %s" % (len(
                            wrap_byjob[jobtid]), jobdict['expect_num_wrap'],
                                               modname)

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in list(wrap_byjob[jobtid].items()):
                            if wdict['status'] is None or wdict[
                                    'status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                    if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += "\n\t\t%s/runjob.out " % (submit_job_path)

                    msg2 += '\n'

                    # print pfw_messages
                    if 'message' in jobdict:
                        print(jobdict['message'])
                        for msgdict in sorted(jobdict['message'],
                                              key=lambda k: k['message_time']):
                            level = int(msgdict['message_lvl'])
                            levelstr = 'info'
                            if level == pfwdefs.PFWDB_MSG_WARN:
                                levelstr = 'WARN'
                            elif level == pfwdefs.PFWDB_MSG_ERROR:
                                levelstr = 'ERROR'

                            msg2 += "\t\t%s - %s\n" % (
                                levelstr, msgdict['message'].replace(
                                    '\n', '\n\t\t\t'))

                    if jobtid in wrap_byjob:
                        # print log file name for failed/unfinished wrappers
                        for wrapnum in failedwraps[jobtid]:
                            wrapdict = wrap_byjob[jobtid][wrapnum]
                            if wrapdict['log'] in logfullnames:
                                msg2 += "\t\t%s - %s\n" % (
                                    wrapnum, logfullnames[wrapdict['log']])
                            else:
                                msg2 += "\t\t%s - Could not find log in archive (%s)\n" % (
                                    wrapnum, wrapdict['log'])
                            wrapmsg = get_qcf_messages(qdbh, config,
                                                       [wrapdict['task_id']])
                            msg2 = print_qcf_messages(config, wrapdict,
                                                      wrapmsg, msg2)

                        msg2 += '\n'

                        # If weirdness happened in run, print a message
                        if len(whyfailwraps[jobtid]) > 0:
                            msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                            msg2 += "\t%s\n" % ','.join(whyfailwraps[jobtid])

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print(
                "\n\nEncountered error trying to gather status information for email"
            )
            print("%s: %s" % (exc.__class__.__name__, str(exc)))
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = "%s:  In dryrun mode, block %s has finished successfully." % (
                    run, blockname)
            else:
                msg1 = "%s:  In dryrun mode, block %s has failed." % (
                    run, blockname)

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = "%s:  block %s has failed." % (run, blockname)
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = "%s:  block %s has finished successfully." % (run,
                                                                 blockname)
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(
                miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = "%s:  run has finished successfully." % (run)
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ",
              len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))

    miscutils.fwdebug_print("Returning retval = %s (%s)" %
                            (retval, type(retval)))
    miscutils.fwdebug_print("END")
    debugfh.close()
    return int(retval)
示例#4
0
def jobpre(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    #debugfh = tempfile.NamedTemporaryFile(prefix='jobpre_', dir='.', delete=False)
    default_log = f"jobpre_{random.randint(1,10000000):08d}.out"
    debugfh = open(default_log, 'w')

    tmpfn = debugfh.name
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv)) # command line for debugging
    print(os.getcwd())

    if len(argv) < 3:
        print("Usage: jobpre configfile jobnum")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    jobnum = sys.argv[2]    # could also be uberctrl

    # read wcl file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blockname = config.getfull('blockname')
    blkdir = config.get('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM:jobnum,
                                                                     'flabel': 'jobpre',
                                                                     'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{tjpad}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        if config.dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
        else:
            dbh = config.dbh

    if 'use_qcf' in config and config['use_qcf']:
        debugfh = Messaging.Messaging(new_log_name, 'jobpre.py', config['pfw_attempt_id'], dbh=dbh, mode='a+', usedb=dbh is not None)
    else:
        debugfh = open(new_log_name, 'a+')

    sys.stdout = debugfh
    sys.stderr = debugfh

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        ctstr = dbh.get_current_timestamp_str()
        dbh.update_job_info(config, tjpad, {'condor_submit_time': ctstr,
                                            'target_submit_time': ctstr})

    log_pfw_event(config, blockname, tjpad, 'j', ['pretask'])

    miscutils.fwdebug_print("jobpre done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    return pfwdefs.PF_EXIT_SUCCESS
示例#5
0
def parse_job_output(config, jobnum, dbh=None, retval=None):
    """TODO: short summary.

    Search stdout/stderr for timing stats as well as eups setup or
    DB connection error messages and insert them into db.
    """
    jobbase = config.get_filename('job',
                                  {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM: jobnum,
                                                         'flabel': 'runjob',
                                                         'fsuffix': ''}})

    tjobinfo = {}
    tjobinfo_task = {}
    for jobfile in ['%sout'%jobbase, '%serr'%jobbase]:
        if os.path.exists(jobfile):
            with open(jobfile, 'r') as jobfh:
                for no, line in enumerate(jobfh):
                    line = line.strip()
                    if line.startswith('PFW:'):
                        parts = line.split()
                        if parts[1] == 'batchid':
                            if parts[2] == '=':   # older pfwrunjob.py
                                tjobinfo['target_job_id'] = parts[3]
                            else:
                                tjobinfo['target_job_id'] = parts[2]
                        elif parts[1] == 'condorid':
                            tjobinfo['condor_job_id'] = parts[2]
                        elif parts[1] == 'job_shell_script':
                            print("parts[2]", parts[2])
                            print("parts[3]", parts[3])
                            if parts[2] == 'exechost:':
                                #tjobinfo['target_exec_host'] = parts[3]
                                tjobinfo_task['exec_host'] = parts[3]
                            elif parts[2] == 'starttime:':
                                tjobinfo_task['start_time'] = datetime.fromtimestamp(float(parts[3]))
                            elif parts[2] == 'endtime:':
                                #tjobinfo['target_end_time'] = datetime.fromtimestamp(float(parts[3]))
                                tjobinfo_task['end_time'] = datetime.fromtimestamp(float(parts[3]))
                            elif parts[2] == 'exit_status:':
                                tjobinfo_task['status'] = parts[3]
                    elif 'ORA-' in line:
                        print("Found:", line)
                        if not 'DBD' in line:
                            print("Setting retval to failure")
                            tjobinfo_task['status'] = pfwdefs.PF_EXIT_FAILURE
                        else:
                            print(" Ignoring QCF perl error message.")
                        if dbh:
                            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                                  config['task_id']['job'][jobnum],
                                                  line, pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif 'No such file or directory: ' in line and \
                            config.getfull('target_des_services') in line:
                        print("Found:", line)
                        if dbh:
                            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                                  config['task_id']['job'][jobnum],
                                                  line, pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif 'Error: eups setup' in line:
                        print("Found:", line)
                        print("Setting retval to failure")
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_EUPS_FAILURE
                        if dbh:
                            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                                  config['task_id']['job'][jobnum],
                                                  line, pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif 'Exiting with status' in line:
                        lmatch = re.search(r'Exiting with status (\d+)', line)
                        if lmatch:
                            if int(lmatch.group(1)) != 0 and retval == 0:
                                print("Found:", line)
                                msg = "Info:  Job exit status was %s, but retval was %s." % \
                                      (lmatch.group(1), retval)
                                msg += "Setting retval to failure."
                                print(msg)
                                tjobinfo['status'] = pfwdefs.PF_EXIT_FAILURE
                                if dbh:
                                    Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                                          config['task_id']['job'][jobnum],
                                                          msg, pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif 'Could not connect to database'in line:
                        print("Found:", line)
                        if dbh:
                            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                                  config['task_id']['job'][jobnum],
                                                  line, pfwdefs.PFWDB_MSG_INFO, jobfile, no)

    return tjobinfo, tjobinfo_task
示例#6
0
def jobpost(argv=None):
    """Performs steps needed after a pipeline job.
    """
    condor2db = {'jobid': 'condor_job_id',
                 'csubmittime': 'condor_submit_time',
                 'gsubmittime': 'target_submit_time',
                 'starttime': 'condor_start_time',
                 'endtime': 'condor_end_time'}

    if argv is None:
        argv = sys.argv

    debugfh = tempfile.NamedTemporaryFile(mode='w+', prefix='jobpost_', dir='.', delete=False)
    tmpfn = debugfh.name
    sys.stdout = debugfh
    sys.stderr = debugfh

    miscutils.fwdebug_print("temp log name = %s" % tmpfn)
    print('cmd>', ' '.join(argv))  # print command line for debugging

    if len(argv) < 7:
        # open file to catch error messages about command line
        print('Usage: jobpost.py configfile block jobnum inputtar outputtar retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    jobnum = argv[3]
    inputtar = argv[4]
    outputtar = argv[5]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 7:
        retval = int(sys.argv[6])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
        miscutils.fwdebug_print("block = %s" % blockname)
        miscutils.fwdebug_print("jobnum = %s" % jobnum)
        miscutils.fwdebug_print("inputtar = %s" % inputtar)
        miscutils.fwdebug_print("outputtar = %s" % outputtar)
        miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    os.chdir("%s/%s" % (blkdir, tjpad))
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM: jobnum,
                                                                     'flabel': 'jobpost',
                                                                     'fsuffix': 'out'}})
    new_log_name = "%s" % (new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))

        # get job information from the job stdout if exists
        (tjobinfo, tjobinfo_task) = parse_job_output(config, jobnum, dbh, retval)

        if dbh and len(tjobinfo) > 0:
            print("tjobinfo: ", tjobinfo)
            dbh.update_tjob_info(config['task_id']['job'][jobnum], tjobinfo)

        # get job information from the condor job log
        logfilename = 'runjob.log'
        if os.path.exists(logfilename) and os.path.getsize(logfilename) > 0:  # if made it to submitting/running jobs
            try:
                # update job info in DB from condor log
                print("Updating job info in DB from condor log")
                condorjobinfo = pfwcondor.parse_condor_user_log(logfilename)
                if len(list(condorjobinfo.keys())) > 1:
                    print("More than single job in job log")
                j = list(condorjobinfo.keys())[0]
                cjobinfo = condorjobinfo[j]
                djobinfo = {}
                for ckey, dkey in list(condor2db.items()):
                    if ckey in cjobinfo:
                        djobinfo[dkey] = cjobinfo[ckey]
                print(djobinfo)
                dbh.update_job_info(config, cjobinfo['jobname'], djobinfo)

                if 'holdreason' in cjobinfo and cjobinfo['holdreason'] is not None:
                    msg = "Condor HoldReason: %s" % cjobinfo['holdreason']
                    print(msg)
                    if dbh:
                        Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                              config['task_id']['job'][jobnum],
                                              msg, pfwdefs.PFWDB_MSG_WARN)

                if 'abortreason' in cjobinfo and cjobinfo['abortreason'] is not None:
                    tjobinfo_task['start_time'] = cjobinfo['starttime']
                    tjobinfo_task['end_time'] = cjobinfo['endtime']
                    if 'condor_rm' in cjobinfo['abortreason']:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_OPDELETE
                    else:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
                else:
                    pass
            except Exception:
                (extype, exvalue, trback) = sys.exc_info()
                traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
        else:
            print("Warning:  no job condor log file")

        if dbh:
            # update job task
            if 'status' not in tjobinfo_task:
                tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
            if 'end_time' not in tjobinfo_task:
                tjobinfo_task['end_time'] = datetime.now()
            wherevals = {'id': config['task_id']['job'][jobnum]}
            dbh.basic_update_row('task', tjobinfo_task, wherevals)
            dbh.commit()

    log_pfw_event(config, blockname, jobnum, 'j', ['posttask', retval])

    # input wcl should already exist in untar form
    if os.path.exists(inputtar):
        print("found inputtar: %s" % inputtar)
        os.unlink(inputtar)
    else:
        print("Could not find inputtar: %s" % inputtar)

    # untar output wcl tar and delete tar
    if os.path.exists(outputtar):
        print("Size of output wcl tar:", os.path.getsize(outputtar))
        if os.path.getsize(outputtar) > 0:
            print("found outputtar: %s" % outputtar)
            pfwutils.untar_dir(outputtar, '..')
            os.unlink(outputtar)
        else:
            msg = "Warn: outputwcl tarball (%s) is 0 bytes." % outputtar
            print(msg)
            if dbh:
                Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                      config['task_id']['job'][jobnum],
                                      msg, pfwdefs.PFWDB_MSG_WARN)
    else:
        msg = "Warn: outputwcl tarball (%s) does not exist." % outputtar
        print(msg)
        if dbh:
            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                  config['task_id']['job'][jobnum],
                                  msg, pfwdefs.PFWDB_MSG_WARN)

    if retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print("Returning retval = %s" % retval)
    miscutils.fwdebug_print("jobpost done")
    debugfh.close()
    return int(retval)
示例#7
0
def run_cmd_qcf(cmd,
                logfilename,
                wid,
                execnames,
                use_qcf=False,
                dbh=None,
                pfwattid=0,
                patterns={}):
    """Execute the command piping stdout/stderr to log and QCF.
    """
    bufsize = 1024 * 10

    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        miscutils.fwdebug_print("BEG")
        miscutils.fwdebug_print("working dir = %s" % (os.getcwd()))
        miscutils.fwdebug_print("cmd = %s" % cmd)
        miscutils.fwdebug_print("logfilename = %s" % logfilename)
        miscutils.fwdebug_print("wid = %s" % wid)
        miscutils.fwdebug_print("execnames = %s" % execnames)
        miscutils.fwdebug_print("use_qcf = %s" % use_qcf)

    use_qcf = miscutils.convertBool(use_qcf)

    sys.stdout.flush()
    try:
        messaging = Messaging.Messaging(logfilename,
                                        execnames,
                                        pfwattid=pfwattid,
                                        taskid=wid,
                                        dbh=dbh,
                                        usedb=use_qcf,
                                        qcf_patterns=patterns)
        process_wrap = subprocess.Popen(shlex.split(cmd),
                                        shell=False,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
    except:
        (extype, exvalue, _) = sys.exc_info()
        print("********************")
        print("Unexpected error: %s - %s" % (extype, exvalue))
        print("cmd> %s" % cmd)
        print("Probably could not find %s in path" % cmd.split()[0])
        print("Check for mispelled execname in submit wcl or")
        print(
            "    make sure that the corresponding eups package is in the metapackage "
        )
        print("    and it sets up the path correctly")
        raise

    try:
        buf = os.read(process_wrap.stdout.fileno(), bufsize).decode()
        while process_wrap.poll() == None or len(buf) != 0:
            messaging.write(buf)
            buf = os.read(process_wrap.stdout.fileno(), bufsize).decode()

    except IOError as exc:
        print("\tI/O error({0}): {1}".format(exc.errno, exc.strerror))

    except:
        (extype, exvalue, _) = sys.exc_info()
        print("\tError: Unexpected error: %s - %s" % (extype, exvalue))
        raise

    sys.stdout.flush()
    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        if process_wrap.returncode != 0:
            miscutils.fwdebug_print(
                "\tInfo: cmd exited with non-zero exit code = %s" %
                process_wrap.returncode)
            miscutils.fwdebug_print("\tInfo: failed cmd = %s" % cmd)
        else:
            miscutils.fwdebug_print("\tInfo: cmd exited with exit code = 0")

    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        miscutils.fwdebug_print("END")
    return process_wrap.returncode
示例#8
0
def run_cmd_qcf(cmd, logfilename, wid, execnames, use_qcf=False, dbh=None, pfwattid=0, patterns={}, threaded=False):
    """ Execute the command piping stdout/stderr to log and QCF """
    bufsize = 1024 * 10
    lasttime = time.time()
    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        miscutils.fwdebug_print("BEG")
        miscutils.fwdebug_print(f"working dir = {os.getcwd()}")
        miscutils.fwdebug_print(f"cmd = {cmd}")
        miscutils.fwdebug_print(f"logfilename = {logfilename}")
        miscutils.fwdebug_print(f"wid = {wid}")
        miscutils.fwdebug_print(f"execnames = {execnames}")
        miscutils.fwdebug_print(f"use_qcf = {use_qcf}")

    use_qcf = miscutils.convertBool(use_qcf)

    sys.stdout.flush()
    try:
        messaging = Messaging.Messaging(logfilename, execnames, pfwattid=pfwattid, taskid=wid,
                                        dbh=dbh, usedb=use_qcf, qcf_patterns=patterns, threaded=threaded)
        process_wrap = subprocess.Popen(shlex.split(cmd),
                                        shell=False,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
    except:
        (extype, exvalue, _) = sys.exc_info()
        print("********************")
        print(f"Unexpected error: {extype} - {exvalue}")
        print(f"cmd> {cmd}")
        print(f"Probably could not find {cmd.split()[0]} in path")
        print("Check for mispelled execname in submit wcl or")
        print("    make sure that the corresponding eups package is in the metapackage ")
        print("    and it sets up the path correctly")
        raise

    try:
        buf = os.read(process_wrap.stdout.fileno(), bufsize)
        while process_wrap.poll() is None or buf:
            if dbh is not None:
                now = time.time()
                if now - lasttime > 30.*60.:
                    if not dbh.ping():
                        dbh.reconnect()
                    lasttime = now
            messaging.write(buf)
            #print buf
            buf = os.read(process_wrap.stdout.fileno(), bufsize)
            # brief sleep
            if process_wrap.poll() is None:
                time.sleep(0.1)

    except IOError as exc:
        print(f"\tI/O error({exc.errno}): {exc.strerror}")

    except:
        (extype, exvalue, _) = sys.exc_info()
        print(f"\tError: Unexpected error: {extype} - {exvalue}")
        raise

    sys.stdout.flush()
    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        if process_wrap.returncode != 0:
            miscutils.fwdebug_print(f"\tInfo: cmd exited with non-zero exit code = {process_wrap.returncode}")
            miscutils.fwdebug_print(f"\tInfo: failed cmd = {cmd}")
        else:
            miscutils.fwdebug_print("\tInfo: cmd exited with exit code = 0")


    if miscutils.fwdebug_check(3, "PFWUTILS_DEBUG"):
        miscutils.fwdebug_print("END")
    return process_wrap.returncode
示例#9
0
def logpre(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    default_log = 'logpre.out'
    debugfh = open(default_log, 'w')
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv)) # command line for debugging

    if len(argv) < 5:
        print("Usage: logpre configfile block subblocktype subblock")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]    # could also be uberctrl
    subblocktype = argv[3]
    subblock = argv[4]

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename('block',
                                       {pfwdefs.PF_CURRVALS: {'subblock': subblock,
                                                              'flabel': '${subblock}_logpre',
                                                              'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")
    debugfh.close()

    os.chmod(default_log, 0o666)
    os.rename(default_log, new_log_name)

    #debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig

    if 'use_qcf' in config and config['use_qcf']:
        if config.dbh is None:
            if 'submit_des_services' in config:
                os.environ['DES_SERVICES'] = config.getfull('submit_des_services')
            os.environ['DES_DB_SECTION'] = config.getfull('submit_des_db_section')

            debugfh = Messaging.Messaging(new_log_name, 'logpre.py', config['pfw_attempt_id'], mode='a+')
        else:
            debugfh = Messaging.Messaging(new_log_name, 'logpre.py', config['pfw_attempt_id'], dbh=config.dbh, mode='a+')

    else:
        debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh


    log_pfw_event(config, blockname, subblock, subblocktype, ['pretask'])

    print("logpre done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    return pfwdefs.PF_EXIT_SUCCESS
示例#10
0
def parse_job_output(config, jobnum, dbh=None, retval=None):
    """ Search stdout/stderr for timing stats as well as eups setup
        or DB connection error messages and insert them into db """
    jobbase = config.get_filename(
        'job', {
            pfwdefs.PF_CURRVALS: {
                pfwdefs.PF_JOBNUM: jobnum,
                'flabel': 'runjob',
                'fsuffix': ''
            }
        })
    tjobinfo = {}
    tjobinfo_task = {}
    for jobfile in [f"{jobbase}out", f"{jobbase}err"]:
        if os.path.exists(jobfile):
            with open(jobfile, 'r') as jobfh:
                for no, line in enumerate(jobfh):
                    line = line.strip()
                    if line.startswith('PFW:'):
                        parts = line.split()
                        if parts[1] == 'batchid':
                            if parts[2] == '=':  # older pfwrunjob.py
                                tjobinfo['target_job_id'] = parts[3]
                            else:
                                tjobinfo['target_job_id'] = parts[2]
                        elif parts[1] == 'condorid':
                            tjobinfo['condor_job_id'] = parts[2]
                        elif parts[1] == 'job_shell_script':
                            #print("parts[2]", parts[2])
                            #print("parts[3]", parts[3])
                            if parts[2] == 'exechost:':
                                #tjobinfo['target_exec_host'] = parts[3]
                                tjobinfo_task['exec_host'] = parts[3]
                            elif parts[2] == 'starttime:':
                                tjobinfo_task[
                                    'start_time'] = datetime.fromtimestamp(
                                        float(parts[3]))
                            elif parts[2] == 'endtime:':
                                #tjobinfo['target_end_time'] = datetime.fromtimestamp(float(parts[3]))
                                tjobinfo_task[
                                    'end_time'] = datetime.fromtimestamp(
                                        float(parts[3]))
                            elif parts[2] == 'exit_status:':
                                tjobinfo_task['status'] = parts[3]
                    # skip ORA messages as they are caught by the QCF earlier, and not all are fatal
                    #elif 'ORA-' in line:
                    #print "Found:", line
                    #if not 'DBD' in line:
                    #    print "Setting retval to failure"
                    #    tjobinfo_task['status'] = pfwdefs.PF_EXIT_FAILURE
                    #else:
                    #    print " Ignoring QCF perl error message."
                    #    if dbh:
                    #        Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                    #                              config['task_id']['job'][jobnum],
                    #                              line, pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif "No such file or directory:" in line and \
                          config.getfull('target_des_services') in line:
                        #print "Found:", line
                        if dbh:
                            Messaging.pfw_message(
                                dbh, config['pfw_attempt_id'],
                                config['task_id']['job'][jobnum], line,
                                pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif "Error: eups setup" in line:
                        #print "Found:", line
                        print("Setting retval to failure")
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_EUPS_FAILURE
                        if dbh:
                            Messaging.pfw_message(
                                dbh, config['pfw_attempt_id'],
                                config['task_id']['job'][jobnum], line,
                                pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif "Exiting with status" in line:
                        lmatch = re.search(r'Exiting with status (\d+)', line)
                        if lmatch:
                            if int(lmatch.group(1)) != 0 and retval == 0:
                                #print "Found:", line
                                msg = f"Info:  Job exit status was {lmatch.group(1)}, but retval was {retval}."
                                msg += "Setting retval to failure."
                                #print msg
                                tjobinfo['status'] = pfwdefs.PF_EXIT_FAILURE
                                if dbh:
                                    Messaging.pfw_message(
                                        dbh, config['pfw_attempt_id'],
                                        config['task_id']['job'][jobnum], msg,
                                        pfwdefs.PFWDB_MSG_ERROR, jobfile, no)
                    elif "Could not connect to database" in line:
                        #print "Found:", line
                        if dbh:
                            Messaging.pfw_message(
                                dbh, config['pfw_attempt_id'],
                                config['task_id']['job'][jobnum], line,
                                pfwdefs.PFWDB_MSG_INFO, jobfile, no)

    return tjobinfo, tjobinfo_task
示例#11
0
def logpost(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('logpost.out', 'w')
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    if len(argv) < 5:
        print("Usage: logpost configfile block subblocktype subblock retval")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    subblocktype = argv[3]
    subblock = argv[4]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 6:
        retval = int(sys.argv[5])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"configfile = {configfile}")
        miscutils.fwdebug_print(f"block = {blockname}")
        miscutils.fwdebug_print(f"subblock = {subblock}")
        miscutils.fwdebug_print(f"retval = {retval}")

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename(
        'block', {
            pfwdefs.PF_CURRVALS: {
                'flabel': '${subblock}_logpost',
                'subblock': subblock,
                'fsuffix': 'out'
            }
        })
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig

    os.chmod('logpost.out', 0o666)
    os.rename('logpost.out', new_log_name)
    if 'use_qcf' in config and config['use_qcf']:
        if config.dbh is None:
            if 'submit_des_services' in config:
                os.environ['DES_SERVICES'] = config.getfull(
                    'submit_des_services')
            os.environ['DES_DB_SECTION'] = config.getfull(
                'submit_des_db_section')

            debugfh = Messaging.Messaging(new_log_name,
                                          'logpost.py',
                                          config['pfw_attempt_id'],
                                          mode='a+')
        else:
            debugfh = Messaging.Messaging(new_log_name,
                                          'logpost.py',
                                          config['pfw_attempt_id'],
                                          dbh=config.dbh,
                                          mode='a+')

    else:
        debugfh = open(new_log_name, 'a+')

    sys.stdout = debugfh
    sys.stderr = debugfh

    log_pfw_event(config, blockname, subblock, subblocktype,
                  ['posttask', retval])

    # In order to continue, make pipelines dagman jobs exit with success status
    #if 'pipelinesmngr' not in subblock:
    #    retval = pfwdefs.PF_EXIT_SUCCESS

    #    # If error at non-manager level, send failure email
    #    if retval != pfwdefs.PF_EXIT_SUCCESS and \
    #        'mngr' not in subblock:
    #        send_subblock_email(config, blockname, subblock, retval)

    if subblock != 'begblock' and retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print(f"returning retval = {retval}")
    miscutils.fwdebug_print("logpost done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    miscutils.fwdebug_print(f"Exiting with = {retval}")
    return int(retval)
示例#12
0
    def update_job_target_info(self,
                               wcl,
                               submit_condor_id=None,
                               target_batch_id=None,
                               exechost=None):
        """Save information about target job from pfwrunjob.
        """
        params = {}
        setvals = []
        if submit_condor_id is not None:
            setvals.append('condor_job_id=%s' %
                           self.get_named_bind_string('condor_job_id'))
            params['condor_job_id'] = float(submit_condor_id)

        if target_batch_id is not None:
            setvals.append('target_job_id=%s' %
                           self.get_named_bind_string('target_job_id'))
            params['target_job_id'] = target_batch_id

        if 'jobroot' in wcl:
            setvals.append('jobroot=%s' %
                           self.get_named_bind_string('jobroot'))
            params['jobroot'] = wcl['jobroot']

        if len(setvals) > 0:
            params['task_id'] = wcl['task_id']['job']

            sql = "update pfw_job set %s where task_id=%s and condor_job_id is NULL" % (
                ','.join(setvals), self.get_named_bind_string('task_id'))

            if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
                miscutils.fwdebug_print("sql> %s" % sql)
            if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
                miscutils.fwdebug_print("params> %s" % params)
            curs = self.cursor()
            try:
                curs.execute(sql, params)
            except:
                (type, value, _) = sys.exc_info()
                print("******************************")
                print("Error:", type, value)
                print("sql> %s\n" % (sql))
                print("params> %s\n" % params)
                raise

            if curs.rowcount == 0:
                Messaging.pfw_message(self, wcl['pfw_attempt_id'],
                                      wcl['task_id']['job'],
                                      "Job attempted to run more than once",
                                      pfw_utils.PFWDB_MSG_ERROR)

                print("******************************")
                print("Error:  This job has already been run before.")
                print("pfw_attempt_id = ", wcl['pfw_attempt_id'])
                print("reqnum = ", wcl[pfwdefs.REQNUM])
                print("unitname = ", wcl[pfwdefs.UNITNAME])
                print("attnum = ", wcl[pfwdefs.ATTNUM])
                print("blknum = ", wcl[pfwdefs.PF_BLKNUM])
                print("jobnum = ", wcl[pfwdefs.PF_JOBNUM])
                print("job task_id = ", wcl['task_id']['job'])

                print("\nThe 1st job information:")
                curs2 = self.cursor()
                sql = "select * from pfw_job, task where pfw_job.task_id=task.id and pfw_job.task_id=%s" % (
                    self.get_named_bind_string('task_id'))
                curs2.execute(sql, {'task_id': wcl['task_id']['job']})
                desc = [d[0].lower() for d in curs2.description]
                for row in curs2:
                    d = dict(list(zip(desc, row)))
                    for k, v in list(d.items()):
                        print(k, v)
                    print("\n")

                print("\nThe 2nd job information:")
                print("submit_condor_id = ", submit_condor_id)
                print("target_batch_id = ", target_batch_id)
                print("exechost = ", exechost)
                print("current time = ", str(datetime.now()))

                print("\nupdate statement information")
                print("sql> %s\n" % sql)
                print("params> %s\n" % params)

                raise Exception("Error: job attempted to run more than once")

        if exechost is not None:
            sql = "update task set exec_host='%s'" % (exechost)

            if 'PFW_JOB_START_EPOCH' in os.environ:
                # doing conversion on DB to avoid any timezone issues
                sql += ", start_time = (from_tz(to_timestamp('1970-01-01','YYYY-MM-DD') + numtodsinterval(%s,'SECOND'), 'UTC') at time zone 'US/Central')" % (
                    os.environ['PFW_JOB_START_EPOCH'])

            sql += ' where id=%s' % (wcl['task_id']['job'])
            curs = self.cursor()
            curs.execute(sql)
            self.commit()
示例#13
0
def blockpost(argv=None):
    """ Program entry point """
    realstdout = sys.stdout
    realstderr = sys.stderr

    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh
    print(' '.join(argv))  # print command line for debugging

    print(f"running on {socket.gethostname()}")

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"configfile = {configfile}")
    miscutils.fwdebug_print(f"retval = {retval}")

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('block',
                                       {pfwdefs.PF_CURRVALS: {'flabel': 'blockpost',
                                                              'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    qdbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}   # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    sem = None
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        try:
            miscutils.fwdebug_print("Connecting to DB")
            if config.dbh is None:
                dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            else:
                dbh = config.dbh
            if verify_files:
                curs = dbh.cursor()
                curs.execute(f"select root from ops_archive where name='{config.getfull('home_archive')}'")
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(f"Invalid archive name ({config.getfull('home_archive')}).   Found {len(rows)} rows in ops_archive")
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(f"Cannot read archive root directory:{config.getfull('home_archive')} This program must be run on an NCSA machine with access to the archive storage system.")
                sem = dbsem.DBSemaphore('verify_files_10', None, config.getfull('submit_des_services'), config.getfull('submit_des_db_section'), connection=dbh)
                print("\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(dbh=dbh, archive=config.getfull('home_archive'), pfwid=attid, md5sum=False, debug=False, script=False, verbose=False, silent=True)
                if sem is not None:
                    del sem
                    sem = None
                print(f"  Verification of files returned status {verify_status:d}")
                if verify_status != 0:
                    print("  This indicates that one or more files do not have the correct file size (based on DB entries). Run")
                    print(f"\n    compare_db.py --des_services {config.getfull('submit_des_services')} --section {config.getfull('submit_des_db_section')} --archive {config.getfull('home_archive')} --pfwid {int(attid):d} --verbose")
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                #qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                #                   config.getfull('submit_des_db_section'))
                qdbh = qcfdb.QCFDB(connection=dbh)

            print(f"\n\nChecking non-job block task status from task table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id'] and
                    str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(f"Done getting block task info from DB ({end_time - start_time} secs)")
                for bltdict in bltasks.values():
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += f"\t{bltdict['name']}"
                        if bltdict['label'] is not None:
                            msg2 += f" - {bltdict['label']}"
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print("Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = f"select id from task where parent_task_id={begblock_tid:d} and status!=0"
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print("Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(f"Done querying QCF messages ({end_time-start_time} secs)")
                                miscutils.fwdebug_print(f"wrapmsg = {wrapmsg}")
                            if not wrapmsg:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in wrapmsg.values():
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print(f"\n\nChecking job status from pfw_job table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting job info from DB ({end_time - start_time} secs)")
                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid, pfw_block_task_id=blktid)
                    if retval != pfwdefs.PF_EXIT_SUCCESS:
                        jobwrap = dbh.get_jobwrapper_info(id=attid)
                    else:
                        jobwrap = {}
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting wrapper info from DB ({end_time - start_time} secs)")
            else:
                msg = f"Could not find task id for block {blockname} in config.des"
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid, config['task_id']['attempt'],
                                          msg, pfwdefs.PFWDB_MSG_INFO, 'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done saving pfw message ({end_time - start_time} secs)")
                print("all the task ids:", config['task_id'])


            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_log_fullnames(attid, archive)
            #dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print(f"Warn: could not find jobs for block {blknum}")
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:", list(job_byblk.keys()))
            else:
                wrap_byjob, _ = pfwutils.index_wrapper_info(wrapinfo)
                #for wid,jwr in jobwrap.iteritems():
                    #print wid,jwr

                # in case the post wrapper stuff failed, internally mark the task
                # as failed to retrieve the info later
                for wrapb in wrap_byjob.values():
                    for wrapper in wrapb.values():
                        if wrapper['parent_task_id'] in jobwrap and jobwrap[wrapper['parent_task_id']]['status'] is not None \
                           and wrapper['status'] is not None and jobwrap[wrapper['parent_task_id']]['status'] > wrapper['status']:
                            wrapper['status'] = jobwrap[wrapper['parent_task_id']]['status']

                #print "wrap_bymod:", wrap_bymod
                jobtid = ''
                jobdict = {}
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict['status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = f"{config.getfull('work_dir')}/B{int(config.getfull('blknum')):02d}-{config.getfull('blockname'):s}/{int(jobdict['jobnum']):04d}"
                    msg2 += f"\n\t{pfwutils.pad_jobnum(jobdict['jobnum'])} ({jobkeys}) "


                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid])
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += f"{len(wrap_byjob[jobtid]):d}/{jobdict['expect_num_wrap']}  {modname}"

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in wrap_byjob[jobtid].items():
                            if wdict['status'] is None or wdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                    msg2 += f"\n\t\t{submit_job_path}/runjob.out "

                msg2 += '\n'

                # print pfw_messages
                if 'message' in jobdict:
                    print('\nmessages: ', jobdict['message'])
                    for msgdict in sorted(jobdict['message'], key=lambda k: k['message_time']):
                        level = int(msgdict['message_lvl'])
                        levelstr = 'info'
                        if level == pfwdefs.PFWDB_MSG_WARN:
                            levelstr = 'WARN'
                        elif level == pfwdefs.PFWDB_MSG_ERROR:
                            levelstr = 'ERROR'

                        msg2 += "\t\t{} - {}\n".format(levelstr, msgdict['message'].replace('\n', '\n\t\t\t'))

                if jobtid in wrap_byjob:
                    # print log file name for failed/unfinished wrappers
                    for wrapnum in failedwraps[jobtid]:
                        wrapdict = wrap_byjob[jobtid][wrapnum]
                        if wrapdict['log'] in logfullnames:
                            msg2 += f"\t\t{wrapnum} - {logfullnames[wrapdict['log']]}\n"
                        else:
                            msg2 += f"\t\t{wrapnum} - Could not find log in archive {wrapdict['log']})\n"
                        wrapmsg = get_qcf_messages(qdbh, [wrapdict['task_id']])
                        msg2 += print_qcf_messages(wrapdict, wrapmsg)

                    msg2 += '\n'

                    # If weirdness happened in run, print a message
                    if whyfailwraps[jobtid]:
                        msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                        msg2 += f"\t{','.join(whyfailwraps[jobtid])}\n"

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print("\n\nEncountered error trying to gather status information for email")
            print(f"{exc.__class__.__name__}: {str(exc)}")
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has finished successfully."
            else:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has failed."

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = f"{run}:  block {blockname} has failed."
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = f"{run}:  block {blockname} has finished successfully."
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = f"{run}:  run has finished successfully."
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    #dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        if dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        #dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ", len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))
        if int(config[pfwdefs.PF_BLKNUM]) > len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) and  miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
            #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
            updatevals = {'PROCESSING_STATE': 'PASS'}
            wherevals = {'PFW_ATTEMPT_ID': attid}
            dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
            dbh.commit()
            #dbh.close()
    elif miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        updatevals = {'PROCESSING_STATE': 'FAIL'}
        wherevals = {'PFW_ATTEMPT_ID': attid}
        dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
        dbh.commit()
        #dbh.close()
    if dbh is not None:
        dbh.close()
    miscutils.fwdebug_print(f"Returning retval = {retval} ({type(retval)})")
    miscutils.fwdebug_print("END")
    debugfh.close()
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"Exiting with = {exitcode}")
        miscutils.fwdebug_print(f"type of exitcode = {type(exitcode)}")

    sys.stdout = realstdout
    sys.stderr = realstderr

    return int(retval)