def setUp(self): """ setUP global values """ appport = 19888 config = TestConfig(appport, os.getenv("COUCHURL"), False); self.setConfig(config) reqmgrCouchDB = splitCouchServiceURL(config.views.wmstats.reqmgrCouchURL)[1] wmstatsCouchDB = splitCouchServiceURL(config.views.wmstats.wmstatsCouchURL)[1] self.setCouchDBs([(reqmgrCouchDB, "ReqMgr"), (wmstatsCouchDB, "WMStats")]) self.setSchemaModules([]) RESTBaseUnitTestWithDBBackend.setUp(self)
def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) self.summaryLevel = summaryLevel
def main(options): url, dbName = splitCouchServiceURL(options.dburl) db = CouchServer(url).connectDatabase(dbName) reqmgr_requests = generate_reqmgr_requests(options.requests) agent_requests = generate_agent_requests(options.requests, options.iterations) if options.add_couchapp: installCouchApp(url, dbName, "WMStats", options.couchapp_base) if options.add_reqmgr_data: for req in reqmgr_requests: db.queue(req) db.commit() print("Added %s reqmgr requests" % len(reqmgr_requests)) if options.add_agent_data: for req in agent_requests: db.queue(req) jobDocs = generate_jobsummary(req['workflow']) for job in jobDocs: db.queue(job) db.commit() print("Added %s agent requests" % len(agent_requests)) print("Added %s job Docs" % (len(agent_requests) * len(jobDocs)))
def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call print "Using LocalCouchDBData Emulator" self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.summaryLevel = summaryLevel logging.info("connect couch %s: %s" % (self.couchURLBase, self.dbName))
def __init__(self, couchURL, dbName = None): # if dbName not given assume we have to split if not dbName: couchURL, dbName = splitCouchServiceURL(couchURL) self.hostWithAuth = couchURL self.server = CouchServer(couchURL) self.db = self.server.connectDatabase(dbName, create = False)
def __init__(self, couchURL, dbName = None): # if dbName not given assume we have to split if not dbName: couchURL, dbName = splitCouchServiceURL(couchURL) self.hostWithAuth = couchURL self.server = CouchServer(couchURL) self.db = self.server.connectDatabase(dbName, create = False) self.defaultOptions = {'stale': "update_after", 'reduce' : True, 'group' : True}
def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call print("Using LocalCouchDBData Emulator") self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.summaryLevel = summaryLevel logging.info("connect couch %s: %s" % (self.couchURLBase, self.dbName))
def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel
def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) #TODO: remove the hard coded name (wma_summarydb) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel
def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/fwjrs", False) self.summaryLevel = summaryLevel
def __init__(self, couchURL, dbName = None): # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)
def __init__(self, couchURL, dbName=None): # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.replicatorDB = self.couchServer.connectDatabase( '_replicator', False)
def getSiteInfoFromGlobalQueue(serviceURL): url, dbName = splitCouchServiceURL(serviceURL) globalQ = WorkQueue(url, dbName) try: queues = globalQ.getChildQueues() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['site_name'] = serviceURL return [errorInfo]
def _commonInit(self, couchURL, appName = "WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"}
def getRequestInfoFromLocalQueue(serviceURL): """ get the request info from local queue """ url, dbName = splitCouchServiceURL(serviceURL) service = WorkQueue(url, dbName) try: wmbsUrls = service.getWMBSUrl() jobStatusInfo = service.getJobInjectStatusByRequest() except Exception, ex: logging.error("%s: %s" % (serviceURL, str(ex))) return DFormatter.errorFormatter(serviceURL, "LocalQueue Down")
def _commonInit(self, couchURL, appName="WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"}
def getSiteInfoFromLocalQueue(serviceURL): """ get agent status from local agent """ url, dbName = splitCouchServiceURL(serviceURL) wqService = WorkQueue(url, dbName) try: wmbsUrls = wqService.getWMBSUrl() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['site_name'] = serviceURL return [errorInfo]
def __init__(self, couchURL, dbName=None, inboxDBName=None): # if dbName not given assume we have to split if not dbName: couchURL, dbName = splitCouchServiceURL(couchURL) self.hostWithAuth = couchURL self.server = CouchServer(couchURL) self.db = self.server.connectDatabase(dbName, create=False) if not inboxDBName: inboxDBName = "%s_inbox" % dbName self.inboxDB = self.server.connectDatabase(inboxDBName, create=False) self.defaultOptions = {'stale': "update_after", 'reduce': True, 'group': True} self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'
def getAgentInfoFromGlobalQueue(serviceURL): url, dbName = splitCouchServiceURL(serviceURL) globalQ = WorkQueue(url, dbName) try: childQueues = globalQ.getChildQueues() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['url'] = serviceURL errorInfo['status'] = "Global Queue down: %s" % serviceURL errorInfo['acdc'] = 'N/A' return [errorInfo]
def __init__(self, rest, config): super(CouchDBCleanup, self).__init__(config) self.reqDB = RequestDBReader(config.reqmgrdb_url) self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger) # statuses that we want to keep the transfer documents self.transferStatuses = [ "assigned", "staging", "staged", "acquired", "failed", "running-open", "running-closed" ] baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) self.acdcService = CouchService(url=baseURL, database=acdcDB)
def getAgentInfoFromLocalQueue(serviceURL): """ get agent status from local agent """ url, dbName = splitCouchServiceURL(serviceURL) localQ = WorkQueue(url, dbName) try: wmbsUrl = localQ.getWMBSUrl() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['url'] = serviceURL errorInfo['status'] = "Local Queue down: %s" % serviceURL errorInfo['acdc'] = 'N/A' return errorInfo
def getRequestInfoFromGlobalQueue(serviceURL): """ get the request info from global queue """ url, dbName = splitCouchServiceURL(serviceURL) service = WorkQueue(url, dbName) try: jobInfo = service.getTopLevelJobsByRequest() qInfo = service.getChildQueuesByRequest() siteWhitelists = service.getSiteWhitelistByRequest() childQueueURLs = set() for item in qInfo: childQueueURLs.add(item['local_queue']) except Exception, ex: logging.error("%s: %s" % (serviceURL, str(ex))) return DFormatter.errorFormatter(serviceURL, "GlobalQueue Down")
def _commonInit(self, couchURL, couchapp): """ setting up comon variables for inherited class. inherited class should call this in their init function """ if isinstance(couchURL, Database): self.couchDB = couchURL self.couchURL = self.couchDB['host'] self.dbName = self.couchDB.name self.couchServer = CouchServer(self.couchURL) else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = couchapp self.defaultStale = {"stale": "update_after"}
def __init__(self, url, identifier, logger=None, **kwds): self.logger = logger if logger else logging.getLogger() self.url = url if url else 'https://cmsweb.cern.ch/couchdb/wmstats_logdb' self.identifier = identifier if identifier else 'unknown' try: self.thread_name = kwds.pop('thread_name') except KeyError: self.thread_name = threading.currentThread().getName() self.user_pat = re.compile(r'^/[a-zA-Z][a-zA-Z0-9/\=\s()\']*\=[a-zA-Z0-9/\=\.\-_/#:\s\']*$') self.agent = 0 if self.user_pat.match(self.identifier) else 1 couch_url, db_name = splitCouchServiceURL(self.url) self.backend = LogDBBackend(couch_url, db_name, identifier, \ self.thread_name, agent=self.agent, **kwds) self.logger.info(self)
def __init__(self, url, identifier, logger=None, **kwds): self.logger = logger if logger else logging.getLogger() if not url or not identifier: raise RuntimeError("Attempt to init LogDB with url='%s', identifier='%s'"\ % (url, identifier)) self.identifier = identifier try: self.thread_name = kwds.pop('thread_name') except KeyError: self.thread_name = threading.currentThread().getName() self.url = url self.user_pat = re.compile(r'^/[a-zA-Z][a-zA-Z0-9/\=\s()\']*\=[a-zA-Z0-9/\=\.\-_/#:\s\']*$') self.agent = 0 if self.user_pat.match(self.identifier) else 1 couch_url, db_name = splitCouchServiceURL(self.url) self.backend = LogDBBackend(couch_url, db_name, identifier, \ self.thread_name, agent=self.agent, **kwds) self.logger.info(self)
def __init__(self, url, identifier, logger=None, **kwds): self.logger = logger if logger else logging.getLogger() if not url or not identifier: raise RuntimeError("Attempt to init LogDB with url='%s', identifier='%s'"\ % (url, identifier)) self.identifier = identifier try: self.thread_name = kwds.pop('thread_name') except KeyError: self.thread_name = threading.currentThread().getName() self.url = url self.user_pat = re.compile( r'^/[a-zA-Z][a-zA-Z0-9/\=\s()\']*\=[a-zA-Z0-9/\=\.\-_/#:\s\']*$') self.agent = 0 if self.user_pat.match(self.identifier) else 1 couch_url, db_name = splitCouchServiceURL(self.url) self.backend = LogDBBackend(couch_url, db_name, identifier, \ self.thread_name, agent=self.agent, **kwds) self.logger.info(self)
def acdcCleanup(self, config): """ gather active data statistics """ reqDB = RequestDBReader(config.reqmgrdb_url) from WMCore.ACDC.CouchService import CouchService baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) acdcService = CouchService(url=baseURL, database=acdcDB) originalRequests = acdcService.listCollectionNames() if len(originalRequests) == 0: return # filter requests results = reqDB._getCouchView("byrequest", {}, originalRequests) # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived] deleteStates = [ "announced", "rejected-archived", "aborted-archived", "normal-archived" ] filteredRequests = [] for row in results["rows"]: if row["value"][0] in deleteStates: filteredRequests.append(row["key"]) total = 0 for req in filteredRequests: try: deleted = acdcService.removeFilesetsByCollectionName(req) if deleted == None: self.logger.warning("request alread deleted %s", req) else: total += len(deleted) self.logger.info("request %s deleted", req) except Exception as ex: self.logger.error( "request deleted failed: will try again %s: %s", req, str(ex)) self.logger.info("total %s requests deleted", total) return
def acdcCleanup(self, config): """ gather active data statistics """ reqDB = RequestDBReader(config.reqmgrdb_url) from WMCore.ACDC.CouchService import CouchService baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) acdcService = CouchService(url = baseURL, database = acdcDB) originalRequests = acdcService.listCollectionNames() if len(originalRequests) == 0: return # filter requests results = reqDB._getCouchView("byrequest", {}, originalRequests) # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived] deleteStates = ["announced", "rejected-archived", "aborted-archived", "normal-archived"] filteredRequests = [] for row in results["rows"]: if row["value"][0] in deleteStates: filteredRequests.append(row["key"]) total = 0 for req in filteredRequests: try: deleted = acdcService.removeFilesetsByCollectionName(req) if deleted == None: self.logger.warning("request alread deleted %s" % req) else: total += len(deleted) self.logger.info("request %s deleted" % req) except: self.logger.error("request deleted failed: will try again %s" % req) self.logger.info("total %s requests deleted" % total) return
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqdbURL=reqmgrUrl, reqdbCouchApp="ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = [ 'normal-archived', 'rejected-archived', 'aborted-archived' ] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag, legacyFormat=True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: # log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({ 'filterEfficiency': filterEfficiency, 'runWhiteList': runWhiteList }) if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \ oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown': prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID=prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime': 'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry[ 'step'] == 'announced': dateString = entry['updater'][ 'submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({ 'mcmApprovalTime': time.mktime(dt.timetuple()) }) found = True if not found: log.error( "History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime': 'NoMcMData'}) except (RuntimeError, IOError): exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info( ) log.error( "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({ 'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData') }) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf].get('priority', 0) requestType = requests[wf].get('request_type', 'Unknown') targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf].get('campaign', 'Unknown') prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error( "Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[ wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime': int(time.time())}) if totalJobs and successJobs == totalJobs and not report[ wf].get('lastJobTime', None): report[wf].update({'lastJobTime': int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime': approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime': assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime': acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime': closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime': announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime': completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime': newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime': archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate': requestDate}) except: pass report[wf].update({ 'priority': priority, 'status': finalStatus, 'type': requestType }) report[wf].update({ 'totalLumis': targetLumis, 'totalEvents': targetEvents, }) report[wf].update({ 'campaign': campaign, 'prepID': prep_id, 'outputTier': outputTier, }) report[wf].update({ 'outputDatasets': outputdatasets, 'inputDataset': inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [ 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100 ]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get( percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({ 'eventProgress': eventProgress, 'lumiProgress': lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log.debug("Workflow updated: %s" % wf) pass else: # log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) dummy = json.dumps( newCouchDoc ) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def cleanDeletedDoc(couchURL, totalLimit, filter, limit, type, lastSeq): couchURLBase, dbName = splitCouchServiceURL(couchURL) couchDB = CouchServer(couchURLBase).connectDatabase(dbName, False) couchDB["timeout"] = 3600 _cleanDeletedDoc(couchDB, totalLimit, filter, limit, type, lastSeq) print("last sequence %s" % couchDB.last_seq)
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmminigUrl, archived = False, log = logging.info): server, database = splitCouchServiceURL(wmminigUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl) reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log("INFO: %s: Getting job information from %s and %s. Please wait." % ( funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived'] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'): runWhiteList = [] filterEfficiency = None try: #log("DEBUG: Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList}) # Basic parameters of the workflow priority = requests[wf]['priority'] requestType = requests[wf]['request_type'] targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf]['campaign'] prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf]['inputdataset'] if isinstance(inputdataset, (list,)): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if type(ds) == list: outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log("ERROR: Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis/targetLumis*100) if targetEvents: eventPercent = min(eventPercent, events/targetEvents*100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime' : int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None): report[wf].update({'lastJobTime' : int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in requests[wf]['request_status']: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime':approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime':assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime':acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime':closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime':announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime':completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime':newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime':archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate' : requestDate}) except: pass report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType}) report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, }) report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, }) report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: #log("DEBUG: Workflow updated: %s" % wf) pass else: #log("DEBUG Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def __init__(self, couchURL): # set the connection for local couchDB call self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
def gatherWMDataMiningStats( wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info ): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl) reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ["normal-archived", "rejected-archived", "aborted-archived"] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"): runWhiteList = [] filterEfficiency = None try: # log("DEBUG: Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get("RunWhiteList", []) filterEfficiency = rmDoc.get("FilterEfficiency", None) except: pass # ReqMgr no longer has the workflow report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList}) if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"): prepID = oldCouchDoc.get("prepID", None) if prepID and nMCMCalls <= maxMCMCalls: nMCMCalls += 1 mcmHistory = mcm.getHistory(prepID=prepID) mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")}) if not oldCouchDoc.has_key("mcmApprovalTime"): report[wf].update({"mcmApprovalTime": "Unknown"}) for entry in mcmHistory: if entry["action"] == "set status" and entry["step"] == "announced": dateString = entry["updater"]["submission_date"] dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M") report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())}) # Basic parameters of the workflow priority = requests[wf]["priority"] requestType = requests[wf]["request_type"] targetLumis = requests[wf].get("input_lumis", 0) targetEvents = requests[wf].get("input_events", 0) campaign = requests[wf]["campaign"] prep_id = requests[wf].get("prep_id", None) outputdatasets = requests[wf].get("outputdatasets", []) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf]["inputdataset"] if isinstance(inputdataset, (list,)): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = "" outputTier = "Unknown" try: outputTiers = [] for ds in outputdatasets: if type(ds) == list: outputTiers.append(ds[0].split("/")[-1]) else: outputTiers.append(ds.split("/")[-1]) except: log( "ERROR: Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split("/")[-1] if inputTier in ["GEN"]: outputTier = "LHE" elif inputTier in ["RAW", "RECO"]: outputTier = "AOD" elif inputTier in ["GEN-SIM"]: outputTier = "AODSIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" else: if len(outputTiers) == 1 and "GEN" in outputTiers: if "STEP0ATCERN" in wf: outputTier = "STEP0" else: outputTier = "FullGen" elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "RECO" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "GEN-SIM" in outputTiers: outputTier = "GEN-SIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" elif "RECO" in outputTiers: outputTier = "AOD" elif "AOD" in outputTiers: outputTier = "AOD" else: outputTier = "GEN-SIM" # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get("events", 0) lumis = dsr.get("totalLumis", 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs["sucess"] totalJobs += jobs["created"] try: if totalJobs and not report[wf].get("firstJobTime", None): report[wf].update({"firstJobTime": int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None): report[wf].update({"lastJobTime": int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in requests[wf]["request_status"]: finalStatus = status["status"] if status["status"] == "new": newTime = status["update_time"] if status["status"] == "assignment-approved": approvedTime = status["update_time"] if status["status"] == "assigned": assignedTime = status["update_time"] if status["status"] == "completed": completedTime = status["update_time"] if status["status"] == "acquired": acquireTime = status["update_time"] if status["status"] == "closed-out": closeoutTime = status["update_time"] if status["status"] == "announced": announcedTime = status["update_time"] if status["status"] == "normal-archived": archivedTime = status["update_time"] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get("approvedTime", None): report[wf].update({"approvedTime": approvedTime}) if assignedTime and not report[wf].get("assignedTime", None): report[wf].update({"assignedTime": assignedTime}) if acquireTime and not report[wf].get("acquireTime", None): report[wf].update({"acquireTime": acquireTime}) if closeoutTime and not report[wf].get("closeoutTime", None): report[wf].update({"closeoutTime": closeoutTime}) if announcedTime and not report[wf].get("announcedTime", None): report[wf].update({"announcedTime": announcedTime}) if completedTime and not report[wf].get("completedTime", None): report[wf].update({"completedTime": completedTime}) if newTime and not report[wf].get("newTime", None): report[wf].update({"newTime": newTime}) if archivedTime and not report[wf].get("archivedTime", None): report[wf].update({"archivedTime": archivedTime}) try: dt = requests[wf]["request_date"] requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt) report[wf].update({"requestDate": requestDate}) except: pass report[wf].update({"priority": priority, "status": finalStatus, "type": requestType}) report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents}) report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier}) report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset}) report[wf].setdefault("lumiPercents", {}) report[wf].setdefault("eventPercents", {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]["lumiPercents"].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]["lumiPercents"][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]["eventPercents"].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]["eventPercents"][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress}) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log("DEBUG: Workflow updated: %s" % wf) pass else: # log("DEBUG Workflow created: %s" % wf) pass try: newCouchDoc["updateTime"] = int(time.time()) report[wf]["updateTime"] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def __init__(self, couchURL, dbName=None): # if dbName not given assume we have to split if not dbName: couchURL, dbName = splitCouchServiceURL(couchURL) self.server = CouchServer(couchURL) self.db = self.server.connectDatabase(dbName, create=False)
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived = False, log = logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % ( funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived'] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: #log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList}) if ('mcmTotalEvents' not in oldCouchDoc or 'mcmApprovalTime' not in oldCouchDoc or oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'): prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID = prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime':'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry['step'] == 'announced': dateString = entry['updater']['submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())}) found = True if not found: log.error("History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime':'NoMcMData'}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID = prepID) report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf]['priority'] requestType = requests[wf]['request_type'] targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf]['campaign'] prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis/targetLumis*100) if targetEvents: eventPercent = min(eventPercent, events/targetEvents*100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime' : int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None): report[wf].update({'lastJobTime' : int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime':approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime':assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime':acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime':closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime':announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime':completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime':newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime':archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate' : requestDate}) except: pass report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType}) report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, }) report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, }) report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: #log.debug("Workflow updated: %s" % wf) pass else: #log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()