def test(): logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG)) logger = logging.getLogger("testworkflowmonitLogger") if not os.path.isdir(LOGDIR): os.makedirs(LOGDIR) cred = get_yamlconfig(CRED_FILE_PATH) recipients = get_yamlconfig(CONFIG_FILE_PATH).get('alert_recipients', []) try: wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=True) totaldocs = [] for pack in wfpacks: docs = buildDoc(pack, doconcurrent=True) totaldocs.extend(docs) # predictions logger.info("Making predicions for {} workflows..".format( len(totaldocs))) makingPredictionsWithML(totaldocs) # labeling qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'" archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd) _wfnames = [w.name for w in archivedwfs] logger.info("Passing {} workflows for label making..".format( len(_wfnames))) updateLabelArchives(_wfnames) except Exception: logger.exception( f"Exception encountered, sending emails to {str(recipients)}")
def getCompletedWorkflowsFromDb(configPath): """ Get completed workflow list from local status db (setup to avoid unnecessary caching) Workflows whose status ends with *archived* are removed from further caching. :param str configPath: location of config file :returns: list of workflow (str) :rtype: list """ config = get_yamlconfig(configPath) if not config: sys.exit('Config file: {} not exist, exiting..'.format(configPath)) dbPath = config.get( 'workflow_status_db', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'workflow_status.sqlite')) DB_CREATE_CMD = """CREATE TABLE IF NOT EXISTS workflowStatuses ( name TEXT PRIMARY KEY, status TEXT, failurerate REAL );""" DB_QUERY_CMD = """SELECT * FROM workflowStatuses WHERE status LIKE '%archived'""" res = [] conn = sqlite3.connect(dbPath) with conn: c = conn.cursor() c.execute(DB_CREATE_CMD) for row in c.execute(DB_QUERY_CMD): res.append(row[0]) return res
def updateWorkflowStatusToDb(configPath, wcErrorInfos): """ update workflow status to local status db, with the information from ``wcErrorInfos``. :param str configPath: location of config file :param list wcErrorInfos: list of dicts returned by :py:func:`buildDoc` :returns: True """ config = get_yamlconfig(configPath) if not config: sys.exit('Config path: {} not exist, exiting..'.format(configPath)) dbPath = config.get( 'workflow_status_db', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'workflow_status.sqlite')) DB_UPDATE_CMD = """INSERT OR REPLACE INTO workflowStatuses VALUES (?,?,?)""" toUpdate = [] for e in wcErrorInfos: entry = (e.get('name', ''), e.get('status', ''), e.get('failureRate', 0.)) if not all(entry[:-1]): continue toUpdate.append(entry) conn = sqlite3.connect(dbPath) with conn: c = conn.cursor() c.executemany(DB_UPDATE_CMD, toUpdate) return True
def main(): with open(LOGGING_CONFIG, 'r') as f: config = yaml.safe_load(f.read()) logging.config.dictConfig(config) if not os.path.isdir(LOGDIR): os.makedirs(LOGDIR) cred = get_yamlconfig(CRED_FILE_PATH) recipients = get_yamlconfig(CONFIG_FILE_PATH).get('alert_recipients', []) try: wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False) totaldocs = [] for pack in wfpacks: docs = buildDoc(pack, doconcurrent=True) totaldocs.extend(docs) # update status in local db updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs) # send to CERN MONIT failures = sendDoc(cred, docs) # alerts alertWithEmail(docs, recipients) # backup doc bkpfn = join(LOGDIR, 'toSendDoc_{}'.format(time.strftime('%y%m%d-%H%M%S'))) bkpdoc = save_json(docs, filename=bkpfn, gzipped=True) logger.info('Document backuped at: {}'.format(bkpdoc)) # backup failure msg faildocfn = join( LOGDIR, 'amqFailMsg_{}'.format(time.strftime('%y%m%d-%H%M%S'))) if len(failures): faildoc = save_json(failures, filename=faildocfn, gzipped=True) logger.info('Failed message saved at: {}'.format(faildoc)) logger.info('Number of updated workflows: {}'.format(len(docs))) # predictions makingPredictionsWithML(totaldocs) except Exception as e: logger.exception(f"Exception encountered, sending emails to {str(recipients)}") errorEmailShooter(str(e), recipients)
def updateLabelArchives(wfnames, configpath=CONFIG_FILE_PATH): """Given a list of workflownames, make labels for those that has not been labelled before, and update db :param list wfnames: list of workflow names :param str configpath: path of config yml contains db connection info """ config = get_yamlconfig(configpath) labeled_ = get_labeled_workflows(config) workflowstoquery = [w for w in wfnames if w not in labeled_] logger.info("Making labels for {} workflows...".format( len(workflowstoquery))) values = list(label_workflows(workflowstoquery).items()) update_label_archive_db(config, values)
def __init__(self): self._server = 'https://its.cern.ch/jira' cookiefile = get_yamlconfig(CRED_FILE_PATH).get('jiracookie', None) if not cookiefile or not isfile(cookiefile): raise ValueError( "`jiracookie` not existed in credential.yml or file not exist!\nJiraClient cannot be constructed." ) cookies = {} for l in open(cookiefile).readlines(): _l = l.split() if len(_l) < 7: continue if _l[5] in ['JSESSIONID', 'atlassian.xsrf.token']: cookies[_l[5]] = _l[6] if not cookies: raise ValueError("`jiracookie` file corrupted!") self.client = jira.JIRA(self._server, options=dict(cookies=cookies))
def update_prediction_db(preds, configpath=CONFIG_FILE_PATH): """update prediction results Arguments: preds {dict} -- dictionary -> {wfname: [good_prob, acdc_prob, resubmit_prob]} configpath {str} -- path of configs contains db connection info """ if not preds: return config = get_yamlconfig(configpath) timestamp = fmttime(time.time()) values = [(wf, round(predval[0], 6), round(predval[1], 6), round(predval[2], 6), timestamp) for wf, predval in preds.items()] update_prediction_history_db(config, values)
def do_work(item): """Query, build and return the error doc. :param tuple item: (``Workflow``, minFailureRate, configPath) :returns: error doc :rtype: dict """ wf, minFailureRate, configPath = item # database path and insertion command dbPath = get_yamlconfig(configPath).get( 'workflow_status_db', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'workflow_status.sqlite')) DB_UPDATE_CMD = """INSERT OR REPLACE INTO workflowStatuses VALUES (?,?,?)""" res = {} try: time.sleep(random.random() * 0.1) failurerate = wf.get_failure_rate() toUpdate = (wf.name, wf.get_reqdetail().get(wf.name, {}).get('RequestStatus', ''), failurerate) if any(toUpdate[:-1]): conn = sqlite3.connect(dbPath) with conn: c = conn.cursor() c.execute(DB_UPDATE_CMD, toUpdate) if failurerate > minFailureRate: res = populate_error_for_workflow(wf) except Exception as e: logger.exception("workflow<{}> except when do_work!\nMSG: {}".format( wf.name, str(e))) pass return res
def test(): with open(LOGGING_CONFIG, 'r') as f: config = yaml.safe_load(f.read()) logging.config.dictConfig(config) cred = get_yamlconfig(CRED_FILE_PATH) wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False) # test only the first batch firstbatch = wfpacks[0] docs = buildDoc(firstbatch, doconcurrent=True) updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs) logger.info('Number of updated workflows: {}'.format(len(docs))) if docs: print('Number of docs: ', len(docs)) if len(str(docs)) > 500: print('[content]', str(docs)[:100], '...', str(docs)[-100:]) else: print('[content]', docs) else: print("docs empty!!")
def main(): logging.config.dictConfig(get_yamlconfig(LOGGING_CONFIG)) cred = get_yamlconfig(CRED_FILE_PATH) localconfig = get_yamlconfig(CONFIG_FILE_PATH) if not os.path.isdir(LOGDIR): os.makedirs(LOGDIR) recipients = localconfig.get('alert_recipients', []) try: wfpacks = prepareWorkflows(CONFIG_FILE_PATH, test=False) totaldocs = [] for pack in wfpacks: try: docs = buildDoc(pack, doconcurrent=True) totaldocs.extend(docs) # update status in local db updateWorkflowStatusToDb(CONFIG_FILE_PATH, docs) # send to CERN MONIT failures = sendDoc(cred, docs) # alerts alertWithEmail(docs, recipients) # backup doc # bkpfn = join(LOGDIR, 'toSendDoc_{}'.format(time.strftime('%y%m%d-%H%M%S'))) # bkpdoc = save_json(docs, filename=bkpfn, gzipped=True) # logger.info('Document backuped at: {}'.format(bkpdoc)) # backup failure msg if len(failures): faildocfn = join( LOGDIR, 'amqFailMsg_{}'.format(time.strftime('%y%m%d-%H%M%S'))) faildoc = save_json(failures, filename=faildocfn, gzipped=True) logger.info('Failed message saved at: {}'.format(faildoc)) logger.info('Number of updated workflows: {}'.format( len(docs))) except Exception: logger.exception( f"Exception encountered, sending emails to {str(recipients)}" ) errorEmailShooter(traceback.format_exc(), recipients) # predictions logger.info("Making predicions for {} workflows..".format( len(totaldocs))) makingPredictionsWithML(totaldocs) # labeling qcmd = "SELECT NAME FROM CMS_UNIFIED_ADMIN.WORKFLOW WHERE WM_STATUS LIKE '%archived'" archivedwfs = get_workflow_from_db(CONFIG_FILE_PATH, qcmd) _wfnames = [w.name for w in archivedwfs] logger.info("Passing {} workflows for label making..".format( len(_wfnames))) updateLabelArchives(_wfnames) # archive docs: docs_to_insert = [(doc['name'], json.dumps(doc)) for doc in totaldocs] update_doc_archive_db(localconfig, docs_to_insert) except Exception: logger.exception( f"Exception encountered, sending emails to {str(recipients)}") errorEmailShooter(traceback.format_exc(), recipients)