def main(): reader = WMStatsReader("http://dummy.cern.ch:5984", "wmagent_summary") wmstats = Database('wmagent_summary', 'http://dummy.cern.ch:5984') suspiciousWorkflows = reader.workflowsByStatus(["Processing Done"], stale = False) for entry in suspiciousWorkflows: requestDoc = wmstats.document(entry) statusList = requestDoc['request_status'] if statusList[-2]['status'] == 'normal-archived': statusList = statusList[:-1] requestDoc['request_status'] = statusList wmstats.queue(requestDoc) wmstats.commit()
def getAssignedApprovedWork(): """ Split the un-split. Use a local couch for it. """ workStatistics = {} wmstatsReader = WMStatsReader(wmstatsEndpoint) unAssignedRequests = wmstatsReader.workflowsByStatus(['assignment-approved'], stale = False) queueConfig = queueConfigFromConfigObject(workqueueConfig()) workqueue = queueFromConfig(queueConfig) for requestName in unAssignedRequests: if 'TEST' in requestName: continue workqueue.queueWork('%s/reqmgr_workload_cache/%s/spec' % (externalCouchDb, requestName), requestName, 'notreallyateam') for requestName in unAssignedRequests: workStatistics[requestName] = 0 workElements = workqueue.backend.getElementsForWorkflow(requestName) for element in workElements: jobs = element['Jobs'] workStatistics[requestName] += jobs return workStatistics
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBReader = WMStatsReader(self.config.TaskArchiver.centralWMStatsURL) if self.useReqMgrForCompletionCheck: self.deletableStates = ["announced"] self.centralCouchDBWriter = WMStatsWriter(self.config.TaskArchiver.centralWMStatsURL) else: # Tier0 case self.deletableStates = ["completed"] self.centralCouchDBWriter = self.wmstatsCouchDB jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(self.deletableStates) logging.info("Ready to delete %s" % deletableWorkflows) for workflowName in deletableWorkflows: if self.cleanAllLocalCouchDB(workflowName): self.centralCouchDBWriter.updateRequestStatus(workflowName, "normal-archived") logging.info("status updated to normal-archived %s" % workflowName) abortedWorkflows = self.centralCouchDBReader.workflowsByStatus(["aborted-completed"]) logging.info("Ready to delete aborted %s" % abortedWorkflows) for workflowName in abortedWorkflows: if self.cleanAllLocalCouchDB(workflowName): self.centralCouchDBWriter.updateRequestStatus(workflowName, "aborted-archived") logging.info("status updated to aborted-archived %s" % workflowName) #TODO: following code is temproraly - remove after production archived data is cleaned removableWorkflows = self.centralCouchDBReader.workflowsByStatus(["archived"]) logging.info("Ready to delete %s from wmagent_summary" % removableWorkflows) for workflowName in removableWorkflows: logging.info("Deleting %s from WMAgent Summary Couch" % workflowName) report = self.deleteWorkflowFromJobCouch(workflowName, "WMStats") logging.info("%s docs deleted from wmagent_summary" % report) # only updatet he status when delete is successful # TODO: need to handle the case when there are multiple agent running the same request. if report["status"] == "ok": self.centralCouchDBWriter.updateRequestStatus(workflowName, "normal-archived") logging.info("status updated to normal-archived from archived (this is temp solution for production) %s" % workflowName) except Exception, ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle")
class WMStatsTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["WMStats"] self.testInit = TestInitCouchApp('WorkQueueServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'wmstats_t' self.testInit.setupCouch(dbName, *self.couchApps) self.wmstatsWriter = WMStatsWriter(self.testInit.couchUrl, dbName) self.wmstatsReader = WMStatsReader(self.testInit.couchUrl, dbName) self.wmstatsReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testWMStatsWriter(self): # test getWork schema = generate_reqmgr_schema() self.assertEquals(self.wmstatsWriter.insertRequest(schema[0]), 'OK', 'insert fail') self.assertEquals( self.wmstatsWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail') self.assertEquals( self.wmstatsWriter.updateRequestStatus("not_exist_schema", "assigned"), 'ERROR: request not found - not_exist_schema') self.assertEquals( self.wmstatsWriter.updateTeam(schema[0]['RequestName'], 'teamA'), 'OK', 'update fail') self.assertEquals( self.wmstatsWriter.updateTeam("not_exist_schema", 'teamA'), 'ERROR: request not found - not_exist_schema') totalStats = { 'total_jobs': 100, 'input_events': 1000, 'input_lumis': 1234, 'input_num_files': 5 } self.assertEquals( self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'INSERTED', 'update fail') self.assertEquals( self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'UPDATED', 'update fail') self.assertEquals( self.wmstatsWriter.insertTotalStats("not_exist_schema", totalStats), 'ERROR: request not found - not_exist_schema') spec1 = newWorkload(schema[0]['RequestName']) production = spec1.newTask("Production") production.setTaskType("Merge") production.setSiteWhitelist(['TEST_SITE']) self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec1), 'OK', 'update fail') spec2 = newWorkload("not_exist_schema") production = spec2.newTask("Production") production.setTaskType("Merge") self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec2), 'ERROR: request not found - not_exist_schema') requests = self.wmstatsReader.getRequestByStatus(["failed"], jobInfoFlag=False) self.assertEquals(requests.keys(), [schema[0]['RequestName']]) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() self.assertEquals(result.keys(), [schema[0]['RequestName']]) requests = self.wmstatsReader.getActiveData() self.assertEquals(requests.keys(), [schema[0]['RequestName']]) requests = self.wmstatsReader.workflowsByStatus(["failed"]) self.assertEquals(requests, [schema[0]['RequestName']])
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBReader = WMStatsReader( self.config.TaskArchiver.centralWMStatsURL) if self.useReqMgrForCompletionCheck: self.deletableStates = ["announced"] self.centralCouchDBWriter = WMStatsWriter( self.config.TaskArchiver.centralWMStatsURL) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableStates = ["completed"] self.centralCouchDBWriter = self.wmstatsCouchDB jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase( statSummaryDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs( self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") deletableWorkflows = self.centralCouchDBReader.workflowsByStatus( self.deletableStates) logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows)) numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived") logging.info("archive normal %s workflows" % numUpdated) abortedWorkflows = self.centralCouchDBReader.workflowsByStatus( ["aborted-completed"]) logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows)) numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived") logging.info("archive aborted %s workflows" % numUpdated) rejectedWorkflows = self.centralCouchDBReader.workflowsByStatus( ["rejected"]) logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows)) numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived") logging.info("archive rejected %s workflows" % numUpdated) except Exception, ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle")
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBReader = WMStatsReader( self.config.TaskArchiver.centralWMStatsURL) if self.useReqMgrForCompletionCheck: self.deletableStates = ["announced"] self.centralCouchDBWriter = WMStatsWriter( self.config.TaskArchiver.centralWMStatsURL) else: # Tier0 case self.deletableStates = ["completed"] self.centralCouchDBWriter = self.wmstatsCouchDB jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs( self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") deletableWorkflows = self.centralCouchDBReader.workflowsByStatus( self.deletableStates) logging.info("Ready to delete %s" % deletableWorkflows) for workflowName in deletableWorkflows: if self.cleanAllLocalCouchDB(workflowName): self.centralCouchDBWriter.updateRequestStatus( workflowName, "normal-archived") logging.info("status updated to normal-archived %s" % workflowName) abortedWorkflows = self.centralCouchDBReader.workflowsByStatus( ["aborted-completed"]) logging.info("Ready to delete aborted %s" % abortedWorkflows) for workflowName in abortedWorkflows: if self.cleanAllLocalCouchDB(workflowName): self.centralCouchDBWriter.updateRequestStatus( workflowName, "aborted-archived") logging.info("status updated to aborted-archived %s" % workflowName) #TODO: following code is temproraly - remove after production archived data is cleaned removableWorkflows = self.centralCouchDBReader.workflowsByStatus( ["archived"]) logging.info("Ready to delete %s from wmagent_summary" % removableWorkflows) for workflowName in removableWorkflows: logging.info("Deleting %s from WMAgent Summary Couch" % workflowName) report = self.deleteWorkflowFromJobCouch( workflowName, "WMStats") logging.info("%s docs deleted from wmagent_summary" % report) # only updatet he status when delete is successful # TODO: need to handle the case when there are multiple agent running the same request. if report["status"] == "ok": self.centralCouchDBWriter.updateRequestStatus( workflowName, "normal-archived") logging.info( "status updated to normal-archived from archived (this is temp solution for production) %s" % workflowName) except Exception, ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle")
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBWriter = WMStatsWriter(self.config.TaskArchiver.centralWMStatsURL) self.centralCouchDBReader = WMStatsReader(self.config.TaskArchiver.centralWMStatsURL) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") #TODO: define what is deletable status. Also add the code to delet summary document, # request summary and job summary if self.useReqMgrForCompletionCheck: deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(["announced"]) else: deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(["completed"]) logging.info("Ready to delete %s" % deletableWorkflows) for workflowName in deletableWorkflows: logging.info("Deleting %s from JobCouch" % workflowName) report = self.deleteWorkflowFromJobCouch(workflowName, "JobDump") logging.info("%s docs deleted from JobDump" % report) report = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump") logging.info("%s docs deleted from FWJRDump" % report) self.centralCouchDBWriter.updateRequestStatus(workflowName, "archived") logging.info("status updated to archived %s" % workflowName) except Exception, ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle")
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBReader = WMStatsReader(self.config.TaskArchiver.centralWMStatsURL) if self.useReqMgrForCompletionCheck: self.deletableStates = ["announced"] self.centralCouchDBWriter = WMStatsWriter(self.config.TaskArchiver.centralWMStatsURL) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableStates = ["completed"] self.centralCouchDBWriter = self.wmstatsCouchDB jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(self.deletableStates) logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows)) numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived") logging.info("archive normal %s workflows" % numUpdated) abortedWorkflows = self.centralCouchDBReader.workflowsByStatus(["aborted-completed"]) logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows)) numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived") logging.info("archive aborted %s workflows" % numUpdated) rejectedWorkflows = self.centralCouchDBReader.workflowsByStatus(["rejected"]) logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows)) numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived") logging.info("archive rejected %s workflows" % numUpdated) except Exception, ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle")
class WMStatsTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["WMStats"] self.testInit = TestInitCouchApp('WorkQueueServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'wmstats_t' self.testInit.setupCouch(dbName, *self.couchApps) self.wmstatsWriter = WMStatsWriter(self.testInit.couchUrl, dbName) self.wmstatsReader = WMStatsReader(self.testInit.couchUrl, dbName) self.wmstatsReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testWMStatsWriter(self): # test getWork schema = generate_reqmgr_schema() self.assertEquals(self.wmstatsWriter.insertRequest(schema[0]), 'OK', 'insert fail'); self.assertEquals(self.wmstatsWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail') self.assertEquals(self.wmstatsWriter.updateRequestStatus("not_exist_schema", "assigned"), 'ERROR: request not found - not_exist_schema') self.assertEquals(self.wmstatsWriter.updateTeam(schema[0]['RequestName'], 'teamA'), 'OK', 'update fail') self.assertEquals(self.wmstatsWriter.updateTeam("not_exist_schema", 'teamA'), 'ERROR: request not found - not_exist_schema') totalStats = {'total_jobs': 100, 'input_events': 1000, 'input_lumis': 1234, 'input_num_files': 5} self.assertEquals(self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'INSERTED', 'update fail') self.assertEquals(self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'UPDATED', 'update fail') self.assertEquals(self.wmstatsWriter.insertTotalStats("not_exist_schema", totalStats), 'ERROR: request not found - not_exist_schema') spec1 = newWorkload(schema[0]['RequestName']) production = spec1.newTask("Production") production.setTaskType("Merge") production.setSiteWhitelist(['TEST_SITE']) self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec1), 'OK', 'update fail') spec2 = newWorkload("not_exist_schema") production = spec2.newTask("Production") production.setTaskType("Merge") self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec2), 'ERROR: request not found - not_exist_schema') requests = self.wmstatsReader.getRequestByStatus(["failed"], jobInfoFlag = False) self.assertEquals(requests.keys(), [schema[0]['RequestName']]) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() self.assertEquals(result.keys(), [schema[0]['RequestName']]) requests = self.wmstatsReader.getActiveData() self.assertEquals(requests.keys(), [schema[0]['RequestName']]) requests = self.wmstatsReader.workflowsByStatus(["failed"]) self.assertEquals(requests, [schema[0]['RequestName']])