Exemplo n.º 1
0
def test():
    logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG))
    logger = logging.getLogger("testworkflowmonitLogger")

    if not os.path.isdir(LOGDIR):
        os.makedirs(LOGDIR)

    cred = get_yamlconfig(CRED_FILE_PATH)
    recipients = get_yamlconfig(CONFIG_FILE_PATH).get('alert_recipients', [])

    try:

        wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=True)
        totaldocs = []
        for pack in wfpacks:
            docs = buildDoc(pack, doconcurrent=True)
            totaldocs.extend(docs)

        # predictions
        logger.info("Making predicions for {} workflows..".format(
            len(totaldocs)))
        makingPredictionsWithML(totaldocs)
        # labeling
        qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'"
        archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd)
        _wfnames = [w.name for w in archivedwfs]
        logger.info("Passing {} workflows for label making..".format(
            len(_wfnames)))
        updateLabelArchives(_wfnames)

    except Exception:
        logger.exception(
            f"Exception encountered, sending emails to {str(recipients)}")
Exemplo n.º 2
0
def prepareWorkflows(configpath, minfailurerate=0., test=False, batchsize=15):
    """
    extract workflows from unified db, filter out those need to query,
    stratified with batchsize.

    :param str configpath: path to config file
    :param float minfailurerate: input to pack for jobs
    :param bool test: for debug
    :param int batchsize: number of workflows per batch
    :returns: list of list of (:py:class:`Workflow`, `minfailurerate`, `configpath`),
     grouped per `batchsize`.
    :rtype: list
    """

    DB_QUERY_CMD = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE 'running%'"

    _wkfs = []
    try:
        _wkfs = get_workflow_from_db(configpath,
                                     DB_QUERY_CMD)  # list of `Workflow`
    except Exception as e:
        logger.error(
            "Fail to get running workflows from UNIFIED DB!\nMsg: {}".format(
                str(e)))
        raise
    msg = 'Number of workflows fetched from db: {}'.format(len(_wkfs))
    logger.info(msg)
    if test: _wkfs = _wkfs[-10:]

    completedWfs = getCompletedWorkflowsFromDb(configpath)
    wkfs = [w for w in _wkfs if w.name not in completedWfs]

    msg = 'Number of workflows to query: {}'.format(len(wkfs))
    logger.info(msg)

    wkfs = [(w, minfailurerate, configpath) for w in wkfs]

    # slice them according to batch size
    res = [wkfs[x:x + batchsize] for x in range(0, len(wkfs), batchsize)]
    msg = 'Divided into {0} batches with batchsize {1}.'.format(
        len(res), batchsize)
    logger.info(msg)
    return res
Exemplo n.º 3
0
def main():

    logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG))
    cred = get_yamlconfig(CRED_FILE_PATH)
    localconfig = get_yamlconfig(CONFIG_FILE_PATH)

    if not os.path.isdir(LOGDIR):
        os.makedirs(LOGDIR)

    recipients = localconfig.get('alert_recipients', [])

    try:
        wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False)
        totaldocs = []
        for pack in wfpacks:
            try:
                docs = buildDoc(pack, doconcurrent=True)
                totaldocs.extend(docs)

                # update status in local db
                updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs)
                # send to CERN MONIT
                failures = sendDoc(cred, docs)
                # alerts
                alertWithEmail(docs, recipients)

                # backup doc
                # bkpfn = join(LOGDIR, 'toSendDoc_{}'.format(time.strftime('%y%m%d-%H%M%S')))
                # bkpdoc = save_json(docs, filename=bkpfn, gzipped=True)
                # logger.info('Document backuped at: {}'.format(bkpdoc))

                # backup failure msg
                if len(failures):
                    faildocfn = join(
                        LOGDIR,
                        'amqFailMsg_{}'.format(time.strftime('%y%m%d-%H%M%S')))
                    faildoc = save_json(failures,
                                        filename=faildocfn,
                                        gzipped=True)
                    logger.info('Failed message saved at: {}'.format(faildoc))

                logger.info('Number of updated workflows: {}'.format(
                    len(docs)))
            except Exception:
                logger.exception(
                    f"Exception encountered, sending emails to {str(recipients)}"
                )
                errorEmailShooter(traceback.format_exc(), recipients)

        # predictions
        logger.info("Making predicions for {} workflows..".format(
            len(totaldocs)))
        makingPredictionsWithML(totaldocs)

        # labeling
        qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'"
        archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd)
        _wfnames = [w.name for w in archivedwfs]
        logger.info("Passing {} workflows for label making..".format(
            len(_wfnames)))
        updateLabelArchives(_wfnames)

        # archive docs:
        docs_to_insert = [(doc['name'], json.dumps(doc)) for doc in totaldocs]
        update_doc_archive_db(localconfig, docs_to_insert)

    except Exception:
        logger.exception(
            f"Exception encountered, sending emails to {str(recipients)}")
        errorEmailShooter(traceback.format_exc(), recipients)