class RequestDBTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["ReqMgr"] self.testInit = TestInitCouchApp("RequestDBServiceTest") self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = "requsetdb_t" self.testInit.setupCouch(dbName, *self.couchApps) self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName) self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName) self.requestWriter.defaultStale = {} self.requestReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testRequestDBWriter(self): # test getWork schema = generate_reqmgr_schema() result = self.requestWriter.insertGenericRequest(schema[0]) self.assertEquals(len(result), 1, "insert fail") self.assertEquals( self.requestWriter.updateRequestStatus(schema[0]["RequestName"], "failed"), "OK", "update fail" ) self.assertEquals( self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), "Error: document not found" ) result = self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]}) self.assertEquals( self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]}), "OK", "update fail", ) self.assertEquals( self.requestWriter.updateRequestProperty("not_exist_schema", {"Teams": "teamA"}), "Error: document not found", ) result = self.requestWriter.getRequestByNames([schema[0]["RequestName"]]) self.assertEquals(len(result), 1, "should be 1") result = self.requestWriter.getRequestByStatus(["failed"], False, 1) self.assertEquals(len(result), 1, "should be 1")
class T0RequestDBTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["T0Request"] self.testInit = TestInitCouchApp('RequestDBServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 't0_requsetdb_t' self.testInit.setupCouch(dbName, *self.couchApps) reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0]) self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0]) self.requestWriter.defaultStale = {} self.requestReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testRequestDBWriter(self): # test getWork schema = generate_reqmgr_schema() result = self.requestWriter.insertGenericRequest(schema[0]) self.assertEqual(len(result), 1, 'insert fail'); result = self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "assigned") self.assertEqual(result, 'not allowed state assigned', 'update fail') self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "new"), 'Error: document not found') allowedStates = ["Closed", "Merge", "AlcaSkim", "Harvesting", "Processing Done", "completed"] for state in allowedStates: self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], state), 'OK') self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "Processing Done"), 'not allowed transition completed to Processing Done') self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "normal-archived"), 'OK') result = self.requestWriter.getRequestByStatus(["normal-archived"], False, 1) self.assertEqual(len(result), 1, "should be 1 but %s" % result)
class RequestDBTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["ReqMgr"] self.testInit = TestInitCouchApp('RequestDBServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'requsetdb_t' self.testInit.setupCouch(dbName, *self.couchApps) self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName) self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName) self.requestWriter.defaultStale = {} self.requestReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testRequestDBWriter(self): # test getWork schema = generate_reqmgr_schema() result = self.requestWriter.insertGenericRequest(schema[0]) self.assertEquals(len(result), 1, 'insert fail'); self.assertEquals(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail') self.assertEquals(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), 'Error: document not found') result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], {'Teams': ['teamA']}) self.assertEquals(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], {'Teams': ['teamA']}), 'OK', 'update fail') self.assertEquals(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}), 'Error: document not found') result = self.requestWriter.getRequestByNames([schema[0]['RequestName']]) self.assertEquals(len(result), 1, "should be 1") result = self.requestWriter.getRequestByStatus(["failed"], False, 1) self.assertEquals(len(result), 1, "should be 1")
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: # sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) # Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return def algorithm(self, parameters=None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp ) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() # Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory( classname="Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut=self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory( classname="Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute( onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def killCondorJobsByWFStatus(self, statusList): if isinstance(statusList, basestring): statusList = [statusList] reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList) logging.info( "There are %d requests in 'aborted' status in central couch.", len(reqNames)) for wf in reqNames: self.workQueue.killWMBSWorkflow(wf) return reqNames def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. mark workflow as completed in the dbsbuffer_workflow table """ if len(finishedwfs) == 0: return logging.info("Found %d candidate workflows for completing: %s", len(finishedwfs), finishedwfs.keys()) completedWorkflowsDAO = self.dbsDaoFactory( classname="UpdateWorkflowsToCompleted") centralCouchAlive = True try: abortedWorkflows = self.killCondorJobsByWFStatus(["aborted"]) self.killCondorJobsByWFStatus(["force-complete"]) except Exception as ex: centralCouchAlive = False logging.error( "we will try again when remote couch server comes back\n%s", str(ex)) if centralCouchAlive: for workflow in finishedwfs: try: # Notify the WorkQueue, if there is one if self.workQueue != None: subList = [] logging.info("Marking subscriptions as Done ...") for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) # Tier-0 case, the agent has to mark it completed if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus( workflow, "completed") logging.info("status updated to completed %s", workflow) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: # Something didn't go well when notifying the workqueue, abort!!! logging.error( "Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: # Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId=sub) except WorkQueueNoMatchingElements: # Subscription wasn't known to WorkQueue, feel free to clean up logging.debug( "Local WorkQueue knows nothing about this subscription: %s", sub) except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class Tier0PluginTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup the test environment """ self.testInit = TestInit(__file__) self.testInit.setDatabaseConnection() self.testInit.setSchema(["WMCore.WMBS"]) self.requestCouchDB = 'wmstats_plugin_t' self.testInit.setupCouch(self.requestCouchDB, 'T0Request') self.testDir = self.testInit.generateWorkDir() reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB) self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request") self.requestDBWriter._setNoStale() self.stateMap = {} self.orderedStates = [] self.plugin = None return def tearDown(self): """ _tearDown_ Clear databases and delete files """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW'] self.stateMap = {'Merge': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name='TestStreamerFileset') topFileset.create() options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return def setupExpressWorkflow(self): """ _setupExpressWorkflow_ Populate WMBS with a express-like workflow, every subscription must be unfinished at first """ workflowName = 'Express_Run481516_StreamZFast' secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT', 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM', 'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT', 'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'] alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd' dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged' self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) expressTask = workload.newTask('Express') for task in secondLevelTasks: secondLevelTask = expressTask.addTask(task) if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO': secondLevelTask.addTask(alcaHarvestTask) elif task == 'ExpressMergewrite_StreamZFast_DQM': secondLevelTask.addTask(dqmHarvestTask) specPath = os.path.join(self.testDir, 'Express.pkl') workload.save(specPath) # Populate WMBS sharedFileset = Fileset(name='TestFileset') sharedFileset.create() sharedFileset.markOpen(False) options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Express' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(sharedFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topLevelSub) for task in [x for x in secondLevelTasks if not x.count('CleanupUnmerged')]: secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options) secondLevelWorkflow.create() mergeSub = Subscription(sharedFileset, secondLevelWorkflow) mergeSub.create() self.stateMap['Harvesting'].append(mergeSub) for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask), ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]: harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child), **options) harvestingWorkflow.create() harvestingSub = Subscription(sharedFileset, harvestingWorkflow) harvestingSub.create() self.stateMap['Processing Done'].append(harvestingSub) return def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = {'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO'] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM') harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return def verifyStateTransitions(self, transitionMethod='markFinished', transitionTrigger=True): """ _verifyStateTransitions_ Utility method which goes through the list of states in self.orderedStates and finishes the tasks that demand a state transition in each step. This according to the defined transition method and trigger. It verifies that the request document in WMStats is moving according to the transitions """ for idx in range(0, len(self.orderedStates) * 2): nextState = self.orderedStates[idx / 2] if (idx / 2) == 0: currentState = 'Closed' else: currentState = self.orderedStates[idx / 2 - 1] if idx % 2 == 0: for transitionObject in self.stateMap[nextState][:-1]: method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState]) self.assertEqual(len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState) self.assertEqual(len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState) else: transitionObject = self.stateMap[nextState][-1] method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState]) self.assertEqual(len(currentStateWorkflows), 0, 'Workflow did not move correctly from %s' % currentState) self.assertEqual(len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState) return def testA_RepackStates(self): """ _testA_RepackStates_ Setup an environment with a Repack workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupRepackWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions('markOpen', False) return def testB_ExpressStates(self): """ _testB_ExpressStates_ Setup an environment with a Express workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupExpressWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return def testC_PromptRecoStates(self): """ _testC_PromptRecoStates_ Setup an environment with a PromptReco workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupPromptRecoWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return def algorithm(self, parameters=None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp ) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() #Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory( classname="Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut=self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory( classname="Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute( onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. update dbsbuffer_workflow table with finished subscription """ #Only delete those where the upload and notification succeeded logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs), finishedwfs.keys())) # update the completed flag in dbsbuffer_workflow table so blocks can be closed # create updateDBSBufferWorkflowComplete DAO if len(finishedwfs) == 0: return completedWorkflowsDAO = self.dbsDaoFactory( classname="UpdateWorkflowsToCompleted") centralCouchAlive = True try: #TODO: need to enable when reqmgr2 -wmstats is ready #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict"); abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus( ["aborted"]) logging.info( "There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows)) forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus( ["force-complete"]) logging.info( "List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows) except Exception as ex: centralCouchAlive = False logging.error( "we will try again when remote couch server comes back\n%s" % str(ex)) if centralCouchAlive: for workflow in finishedwfs: try: #Notify the WorkQueue, if there is one if self.workQueue != None: subList = [] logging.info("Marking subscriptions as Done ...") for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) #Now we know the workflow as a whole is gone, we can delete the information from couch if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus( workflow, "completed") logging.info("status updated to completed %s" % workflow) if workflow in abortedWorkflows: #TODO: remove when reqmgr2-wmstats deployed newState = "aborted-completed" elif workflow in forceCompleteWorkflows: newState = "completed" else: newState = None if newState != None: # update reqmgr workload document only request mgr is installed if not self.useReqMgrForCompletionCheck: # commented out untill all the agent is updated so every request have new state # TODO: agent should be able to write reqmgr db diretly add the right group in # reqmgr self.requestLocalCouchDB.updateRequestStatus( workflow, newState) else: try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) logging.info( "Updating status to '%s' in both oracle and couchdb ..." % newState) self.reqmgrSvc.updateRequestStatus( workflow, newState) #And replace with this - remove all the excption #self.reqmgr2Svc.updateRequestStatus(workflow, newState) except httplib.HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % ( workflow, str(ex)) logging.warning(msg) self.reqmgr2Svc.updateRequestStatus( workflow, newState) else: msg = "%s : fail to update status %s with HTTP error: %s" % ( workflow, newState, str(ex)) logging.error(msg) raise ex logging.info("status updated to '%s' : %s" % (newState, workflow)) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: #Something didn't go well when notifying the workqueue, abort!!! logging.error( "Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: #Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId=sub) except WorkQueueNoMatchingElements: #Subscription wasn't known to WorkQueue, feel free to clean up logging.debug( "Local WorkQueue knows nothing about this subscription: %s" % sub) pass except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False): # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: # sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr(self.config.General.ReqMgr2ServiceURL) # Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return @timeFunction def algorithm(self, parameters=None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() # Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory(classname="Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut=self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory(classname="Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def killCondorJobsByWFStatus(self, statusList): if isinstance(statusList, basestring): statusList = [statusList] reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList) logging.info("There are %d requests in %s status in central couch.", len(reqNames), statusList) self.workQueue.killWMBSWorkflows(reqNames) return reqNames def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. mark workflow as completed in the dbsbuffer_workflow table """ if not finishedwfs: return logging.info("Found %d candidate workflows for completing:", len(finishedwfs)) completedWorkflowsDAO = self.dbsDaoFactory(classname="UpdateWorkflowsToCompleted") centralCouchAlive = True try: self.killCondorJobsByWFStatus(["force-complete", "aborted"]) except Exception as ex: centralCouchAlive = False logging.error("we will try again when remote couch server comes back\n%s", str(ex)) if centralCouchAlive: logging.info("Marking subscriptions as Done ...") for workflow in finishedwfs: try: # Notify the WorkQueue, if there is one if self.workQueue is not None: subList = [] for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) # Tier-0 case, the agent has to mark it completed if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus(workflow, "completed") logging.info("status updated to completed %s", workflow) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: # Something didn't go well when notifying the workqueue, abort!!! logging.error("Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: # Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId=sub) except WorkQueueNoMatchingElements: # Subscription wasn't known to WorkQueue, feel free to clean up logging.debug("Local WorkQueue knows nothing about this subscription: %s", sub) except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB; else: self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname = "Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return def algorithm(self, parameters = None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() #Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory(classname = "Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut = self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory(classname = "Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. update dbsbuffer_workflow table with finished subscription """ #Only delete those where the upload and notification succeeded logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs),finishedwfs.keys())) # update the completed flag in dbsbuffer_workflow table so blocks can be closed # create updateDBSBufferWorkflowComplete DAO if len(finishedwfs) == 0: return completedWorkflowsDAO = self.dbsDaoFactory(classname = "UpdateWorkflowsToCompleted") centralCouchAlive = True try: #TODO: need to enable when reqmgr2 -wmstats is ready #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict"); abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(["aborted"]) logging.info("There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows)) forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(["force-complete"]) logging.info("List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows) except Exception as ex: centralCouchAlive = False logging.error("we will try again when remote couch server comes back\n%s" % str(ex)) if centralCouchAlive: for workflow in finishedwfs: try: #Notify the WorkQueue, if there is one if self.workQueue != None: subList = [] logging.info("Marking subscriptions as Done ...") for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) #Now we know the workflow as a whole is gone, we can delete the information from couch if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus(workflow, "completed") logging.info("status updated to completed %s" % workflow) if workflow in abortedWorkflows: #TODO: remove when reqmgr2-wmstats deployed newState = "aborted-completed" elif workflow in forceCompleteWorkflows: newState = "completed" else: newState = None if newState != None: # update reqmgr workload document only request mgr is installed if not self.useReqMgrForCompletionCheck: # commented out untill all the agent is updated so every request have new state # TODO: agent should be able to write reqmgr db diretly add the right group in # reqmgr self.requestLocalCouchDB.updateRequestStatus(workflow, newState) else: try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) logging.info("Updating status to '%s' in both oracle and couchdb ..." % newState) self.reqmgrSvc.updateRequestStatus(workflow, newState) #And replace with this - remove all the excption #self.reqmgr2Svc.updateRequestStatus(workflow, newState) except httplib.HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (workflow, str(ex)) logging.warning(msg) self.reqmgr2Svc.updateRequestStatus(workflow, newState) else: msg = "%s : fail to update status %s with HTTP error: %s" % (workflow, newState, str(ex)) logging.error(msg) raise ex logging.info("status updated to '%s' : %s" % (newState, workflow)) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: #Something didn't go well when notifying the workqueue, abort!!! logging.error("Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: #Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId = sub) except WorkQueueNoMatchingElements: #Subscription wasn't known to WorkQueue, feel free to clean up logging.info("Local WorkQueue knows nothing about this subscription: %s" % sub) pass except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class Tier0PluginTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup the test environment """ self.testInit = TestInit(__file__) self.testInit.setDatabaseConnection() self.testInit.setSchema(["WMCore.WMBS"]) self.requestCouchDB = 'wmstats_plugin_t' self.testInit.setupCouch(self.requestCouchDB, 'T0Request') self.testDir = self.testInit.generateWorkDir() reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB) self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request") self.requestDBWriter._setNoStale() self.stateMap = {} self.orderedStates = [] self.plugin = None return def tearDown(self): """ _tearDown_ Clear databases and delete files """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = [ 'RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW' ] self.stateMap = {'Merge': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest( {'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name='TestStreamerFileset') topFileset.create() options = { 'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0' } topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow( task= '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return def setupExpressWorkflow(self): """ _setupExpressWorkflow_ Populate WMBS with a express-like workflow, every subscription must be unfinished at first """ workflowName = 'Express_Run481516_StreamZFast' secondLevelTasks = [ 'ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT', 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM', 'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT', 'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO' ] alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd' dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged' self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest( {'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) expressTask = workload.newTask('Express') for task in secondLevelTasks: secondLevelTask = expressTask.addTask(task) if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO': secondLevelTask.addTask(alcaHarvestTask) elif task == 'ExpressMergewrite_StreamZFast_DQM': secondLevelTask.addTask(dqmHarvestTask) specPath = os.path.join(self.testDir, 'Express.pkl') workload.save(specPath) # Populate WMBS sharedFileset = Fileset(name='TestFileset') sharedFileset.create() sharedFileset.markOpen(False) options = { 'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0' } topLevelWorkflow = Workflow(task='/%s/Express' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(sharedFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topLevelSub) for task in [ x for x in secondLevelTasks if not x.count('CleanupUnmerged') ]: secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options) secondLevelWorkflow.create() mergeSub = Subscription(sharedFileset, secondLevelWorkflow) mergeSub.create() self.stateMap['Harvesting'].append(mergeSub) for (parent, child) in [ ('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask), ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask) ]: harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child), **options) harvestingWorkflow.create() harvestingSub = Subscription(sharedFileset, harvestingWorkflow) harvestingSub.create() self.stateMap['Processing Done'].append(harvestingSub) return def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = { 'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': [] } self.orderedStates = [ 'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done' ] # Populate WMStats self.requestDBWriter.insertGenericRequest( {'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = [ '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO' ] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO' ) alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM' ) harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return def verifyStateTransitions(self, transitionMethod='markFinished', transitionTrigger=True): """ _verifyStateTransitions_ Utility method which goes through the list of states in self.orderedStates and finishes the tasks that demand a state transition in each step. This according to the defined transition method and trigger. It verifies that the request document in WMStats is moving according to the transitions """ for idx in range(0, len(self.orderedStates) * 2): nextState = self.orderedStates[idx // 2] if (idx // 2) == 0: currentState = 'Closed' else: currentState = self.orderedStates[idx // 2 - 1] if idx % 2 == 0: for transitionObject in self.stateMap[nextState][:-1]: method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus( [currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus( [nextState]) self.assertEqual( len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState) self.assertEqual( len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState) else: transitionObject = self.stateMap[nextState][-1] method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus( [currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus( [nextState]) self.assertEqual( len(currentStateWorkflows), 0, 'Workflow did not move correctly from %s' % currentState) self.assertEqual( len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState) return def testA_RepackStates(self): """ _testA_RepackStates_ Setup an environment with a Repack workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupRepackWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions('markOpen', False) return def testB_ExpressStates(self): """ _testB_ExpressStates_ Setup an environment with a Express workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupExpressWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return def testC_PromptRecoStates(self): """ _testC_PromptRecoStates_ Setup an environment with a PromptReco workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupPromptRecoWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return