Exemplo n.º 1
0
def test():
    logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG))
    logger = logging.getLogger("testworkflowmonitLogger")

    if not os.path.isdir(LOGDIR):
        os.makedirs(LOGDIR)

    cred = get_yamlconfig(CRED_FILE_PATH)
    recipients = get_yamlconfig(CONFIG_FILE_PATH).get('alert_recipients', [])

    try:

        wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=True)
        totaldocs = []
        for pack in wfpacks:
            docs = buildDoc(pack, doconcurrent=True)
            totaldocs.extend(docs)

        # predictions
        logger.info("Making predicions for {} workflows..".format(
            len(totaldocs)))
        makingPredictionsWithML(totaldocs)
        # labeling
        qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'"
        archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd)
        _wfnames = [w.name for w in archivedwfs]
        logger.info("Passing {} workflows for label making..".format(
            len(_wfnames)))
        updateLabelArchives(_wfnames)

    except Exception:
        logger.exception(
            f"Exception encountered, sending emails to {str(recipients)}")
Exemplo n.º 2
0
def getCompletedWorkflowsFromDb(configPath):
    """
    Get completed workflow list from local status db (setup to avoid unnecessary caching)

    Workflows whose status ends with *archived* are removed from further caching.

    :param str configPath: location of config file
    :returns: list of workflow (str)
    :rtype: list
    """

    config = get_yamlconfig(configPath)
    if not config:
        sys.exit('Config file: {} not exist, exiting..'.format(configPath))
    dbPath = config.get(
        'workflow_status_db',
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'workflow_status.sqlite'))

    DB_CREATE_CMD = """CREATE TABLE IF NOT EXISTS workflowStatuses (
        name TEXT PRIMARY KEY,
        status TEXT,
        failurerate REAL
    );"""
    DB_QUERY_CMD = """SELECT * FROM workflowStatuses WHERE status LIKE '%archived'"""

    res = []
    conn = sqlite3.connect(dbPath)
    with conn:
        c = conn.cursor()
        c.execute(DB_CREATE_CMD)
        for row in c.execute(DB_QUERY_CMD):
            res.append(row[0])

    return res
Exemplo n.º 3
0
def updateWorkflowStatusToDb(configPath, wcErrorInfos):
    """
    update workflow status to local status db, with the information from ``wcErrorInfos``.

    :param str configPath: location of config file
    :param list wcErrorInfos: list of dicts returned by :py:func:`buildDoc`
    :returns: True
    """

    config = get_yamlconfig(configPath)
    if not config:
        sys.exit('Config path: {} not exist, exiting..'.format(configPath))
    dbPath = config.get(
        'workflow_status_db',
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'workflow_status.sqlite'))

    DB_UPDATE_CMD = """INSERT OR REPLACE INTO workflowStatuses VALUES (?,?,?)"""

    toUpdate = []
    for e in wcErrorInfos:
        entry = (e.get('name', ''), e.get('status',
                                          ''), e.get('failureRate', 0.))
        if not all(entry[:-1]):
            continue
        toUpdate.append(entry)

    conn = sqlite3.connect(dbPath)
    with conn:
        c = conn.cursor()
        c.executemany(DB_UPDATE_CMD, toUpdate)

    return True
Exemplo n.º 4
0
def main():

    with open(LOGGING_CONFIG, 'r') as f:
        config = yaml.safe_load(f.read())
        logging.config.dictConfig(config)

    if not os.path.isdir(LOGDIR):
        os.makedirs(LOGDIR)

    cred = get_yamlconfig(CRED_FILE_PATH)
    recipients = get_yamlconfig(CONFIG_FILE_PATH).get('alert_recipients', [])

    try:

        wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False)
        totaldocs = []
        for pack in wfpacks:
            docs = buildDoc(pack, doconcurrent=True)
            totaldocs.extend(docs)

            # update status in local db
            updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs)
            # send to CERN MONIT
            failures = sendDoc(cred, docs)
            # alerts
            alertWithEmail(docs, recipients)

            # backup doc
            bkpfn = join(LOGDIR, 'toSendDoc_{}'.format(time.strftime('%y%m%d-%H%M%S')))
            bkpdoc = save_json(docs, filename=bkpfn, gzipped=True)
            logger.info('Document backuped at: {}'.format(bkpdoc))

            # backup failure msg
            faildocfn = join(
                LOGDIR, 'amqFailMsg_{}'.format(time.strftime('%y%m%d-%H%M%S')))
            if len(failures):
                faildoc = save_json(failures, filename=faildocfn, gzipped=True)
                logger.info('Failed message saved at: {}'.format(faildoc))

            logger.info('Number of updated workflows: {}'.format(len(docs)))

        # predictions
        makingPredictionsWithML(totaldocs)

    except Exception as e:
        logger.exception(f"Exception encountered, sending emails to {str(recipients)}")
        errorEmailShooter(str(e), recipients)
Exemplo n.º 5
0
def updateLabelArchives(wfnames, configpath=CONFIG_FILE_PATH):
    """Given a list of workflownames, make labels for those that has not been
    labelled before, and update db

    :param list wfnames: list of workflow names
    :param str configpath: path of config yml contains db connection info
    """

    config = get_yamlconfig(configpath)

    labeled_ = get_labeled_workflows(config)
    workflowstoquery = [w for w in wfnames if w not in labeled_]
    logger.info("Making labels for {} workflows...".format(
        len(workflowstoquery)))

    values = list(label_workflows(workflowstoquery).items())
    update_label_archive_db(config, values)
Exemplo n.º 6
0
 def __init__(self):
     self._server = 'https://its.cern.ch/jira'
     cookiefile = get_yamlconfig(CRED_FILE_PATH).get('jiracookie', None)
     if not cookiefile or not isfile(cookiefile):
         raise ValueError(
             "`jiracookie` not existed in credential.yml or file not exist!\nJiraClient cannot be constructed."
         )
     cookies = {}
     for l in open(cookiefile).readlines():
         _l = l.split()
         if len(_l) < 7:
             continue
         if _l[5] in ['JSESSIONID', 'atlassian.xsrf.token']:
             cookies[_l[5]] = _l[6]
     if not cookies:
         raise ValueError("`jiracookie` file corrupted!")
     self.client = jira.JIRA(self._server, options=dict(cookies=cookies))
Exemplo n.º 7
0
def update_prediction_db(preds, configpath=CONFIG_FILE_PATH):
    """update prediction results

    Arguments:
        preds {dict} -- dictionary -> {wfname: [good_prob, acdc_prob, resubmit_prob]}
        configpath {str} -- path of configs contains db connection info
    """

    if not preds: return
    config = get_yamlconfig(configpath)

    timestamp = fmttime(time.time())
    values = [(wf, round(predval[0], 6), round(predval[1],
                                               6), round(predval[2],
                                                         6), timestamp)
              for wf, predval in preds.items()]
    update_prediction_history_db(config, values)
Exemplo n.º 8
0
def do_work(item):
    """Query, build and return the error doc.

    :param tuple item: (``Workflow``, minFailureRate, configPath)
    :returns: error doc
    :rtype: dict
    """

    wf, minFailureRate, configPath = item

    # database path and insertion command
    dbPath = get_yamlconfig(configPath).get(
        'workflow_status_db',
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'workflow_status.sqlite'))
    DB_UPDATE_CMD = """INSERT OR REPLACE INTO workflowStatuses VALUES (?,?,?)"""

    res = {}

    try:
        time.sleep(random.random() * 0.1)
        failurerate = wf.get_failure_rate()
        toUpdate = (wf.name,
                    wf.get_reqdetail().get(wf.name,
                                           {}).get('RequestStatus',
                                                   ''), failurerate)
        if any(toUpdate[:-1]):
            conn = sqlite3.connect(dbPath)
            with conn:
                c = conn.cursor()
                c.execute(DB_UPDATE_CMD, toUpdate)

        if failurerate > minFailureRate:
            res = populate_error_for_workflow(wf)
    except Exception as e:
        logger.exception("workflow<{}> except when do_work!\nMSG: {}".format(
            wf.name, str(e)))
        pass

    return res
Exemplo n.º 9
0
def test():
    with open(LOGGING_CONFIG, 'r') as f:
        config = yaml.safe_load(f.read())
        logging.config.dictConfig(config)

    cred = get_yamlconfig(CRED_FILE_PATH)
    wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False)

    # test only the first batch
    firstbatch = wfpacks[0]
    docs = buildDoc(firstbatch, doconcurrent=True)
    updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs)
    logger.info('Number of updated workflows: {}'.format(len(docs)))

    if docs:
        print('Number of docs: ', len(docs))
        if len(str(docs)) > 500:
            print('[content]', str(docs)[:100], '...', str(docs)[-100:])
        else:
            print('[content]', docs)

    else:
        print("docs empty!!")
Exemplo n.º 10
0
def main():

    logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG))
    cred = get_yamlconfig(CRED_FILE_PATH)
    localconfig = get_yamlconfig(CONFIG_FILE_PATH)

    if not os.path.isdir(LOGDIR):
        os.makedirs(LOGDIR)

    recipients = localconfig.get('alert_recipients', [])

    try:
        wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False)
        totaldocs = []
        for pack in wfpacks:
            try:
                docs = buildDoc(pack, doconcurrent=True)
                totaldocs.extend(docs)

                # update status in local db
                updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs)
                # send to CERN MONIT
                failures = sendDoc(cred, docs)
                # alerts
                alertWithEmail(docs, recipients)

                # backup doc
                # bkpfn = join(LOGDIR, 'toSendDoc_{}'.format(time.strftime('%y%m%d-%H%M%S')))
                # bkpdoc = save_json(docs, filename=bkpfn, gzipped=True)
                # logger.info('Document backuped at: {}'.format(bkpdoc))

                # backup failure msg
                if len(failures):
                    faildocfn = join(
                        LOGDIR,
                        'amqFailMsg_{}'.format(time.strftime('%y%m%d-%H%M%S')))
                    faildoc = save_json(failures,
                                        filename=faildocfn,
                                        gzipped=True)
                    logger.info('Failed message saved at: {}'.format(faildoc))

                logger.info('Number of updated workflows: {}'.format(
                    len(docs)))
            except Exception:
                logger.exception(
                    f"Exception encountered, sending emails to {str(recipients)}"
                )
                errorEmailShooter(traceback.format_exc(), recipients)

        # predictions
        logger.info("Making predicions for {} workflows..".format(
            len(totaldocs)))
        makingPredictionsWithML(totaldocs)

        # labeling
        qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'"
        archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd)
        _wfnames = [w.name for w in archivedwfs]
        logger.info("Passing {} workflows for label making..".format(
            len(_wfnames)))
        updateLabelArchives(_wfnames)

        # archive docs:
        docs_to_insert = [(doc['name'], json.dumps(doc)) for doc in totaldocs]
        update_doc_archive_db(localconfig, docs_to_insert)

    except Exception:
        logger.exception(
            f"Exception encountered, sending emails to {str(recipients)}")
        errorEmailShooter(traceback.format_exc(), recipients)