def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] # TODO: (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr = RequestManager(kwargs) # this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {}
def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
cfg = cfg.WMStats import cherrypy from WMCore.Services.RequestManager.RequestManager import RequestManager from WMCore.Services.WorkQueue.WorkQueue import WorkQueue from WMCore.CherryPyThread.PeriodicWorker import PeriodicWorker from WMCore.WMStats.DataCollectTask import DataCollectTask import logging cherrypy.log.error_log.setLevel(logging.DEBUG) cherrypy.log.access_log.setLevel(logging.DEBUG) cherrypy.config["server.socket_port"] = cfg.port #def sayHello(test): # print "Hello" #PeriodicWorker(sayHello, 5) # get reqmgr url from config reqmgrSvc = RequestManager({'endpoint': cfg.reqmgrURL}) wqSvc = WorkQueue(cfg.globalQueueURL) wmstatSvc = WMStatSevice(cfg.couchURL) reqmgrTask = DataCollectTask(reqmgrSvc.getRequest, reqmgrDataFormat, wmstatSvc.uploadData) #reqmgrTask = DataCollectTask(reqmgrSvc.getRequestNames, lambda x: x, wmstatSvc.uploadData) #wqTask = DataCollectTask(wqSvc.getTopLevelJobsByRequest, wqDataFormat, wmstatSvc.uploadData) reqmgrWorker = PeriodicWorker(reqmgrTask, cfg.pollInterval) #wqWorker = PeriodicWorker(wqTask, 200) cherrypy.quickstart()
class RequestLifeCycleBase_t(): request = None request_name = None workqueue = None endpoint = os.environ.get('REQMGRBASEURL', 'https://localhost:8443') reqmgr = RequestManager({'endpoint': endpoint + '/reqmgr/reqMgr'}) team = 'TestTeam' _failure_detected = False @recordException def setUp(self): if self.__class__._failure_detected: raise nose.SkipTest # simple ping check - check reqmgr up tries = 0 while True: try: if not self.__class__.request: self.__class__.reqmgr.getTeam() break except: tries += 1 if tries >= 3: raise nose.SkipTest("Unable to contact reqmgr") time.sleep(15) def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey=key, cert=cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks( makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label) def _convertLabelsToId(self, config): fields = [ 'ProcConfigCacheID', 'Skim1ConfigCacheID', 'StepOneConfigCacheID', 'ConfigCacheID' ] for field in fields: if config.get(field): config[field] = self._configCacheId(config[field]) for field in ['Task1', 'Task2', 'Task3', 'Task4']: if config.get(field): config[field] = self._convertLabelsToId(config[field]) return config @attr("lifecycle") @recordException def test05InjectConfigs(self): """Inject configs to cache""" self.__class__.requestParams = self._convertLabelsToId( self.__class__.requestParams) @attr("lifecycle") @recordException def test10InjectRequest(self): """Can inject a request""" self.__class__.requestParams.setdefault('RequestString', self.__class__.__name__) tries = 0 while True: try: self.__class__.request = self.__class__.reqmgr.makeRequest( **self.__class__.requestParams )['WMCore.RequestManager.DataStructs.Request.Request'] self.__class__.request_name = self.__class__.request[ 'RequestName'] break except: tries += 1 if tries > 3: raise self.assertTrue(self.__class__.request) self.assertTrue(self.__class__.request_name) print("Injected request %s" % self.__class__.request_name) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'new') @attr("lifecycle") @recordException def test20ApproveRequest(self): """Approve request""" self.__class__.reqmgr.reportRequestStatus(self.__class__.request_name, 'assignment-approved') self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assignment-approved') # @attr("lifecycle") @recordException def test30AssignRequest(self): """Assign request""" self.__class__.reqmgr.assign(self.__class__.request_name, self.__class__.team, "Testing", "v1", MergedLFNBase='/store/temp', UnmergedLFNBase='/store/temp') self.__class__.request = self.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assigned') @attr("lifecycle") @recordException def test40WorkQueueAcquires(self): """WorkQueue picks up request""" if not self.__class__.request_name: raise nose.SkipTest start = time.time() while True: workqueue = self.reqmgr.getWorkQueue( request=self.__class__.request_name) if workqueue: self.__class__.workqueue = WorkQueue(workqueue[0]) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertTrue( self.__class__.request['RequestStatus'] in ('acquired', 'running')) request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] if [ x for x in request if x['status'] in ('Available', 'Negotiating', 'Acquired', 'Running') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for workqueue to acquire') time.sleep(15) @attr("lifecycle") @recordException def test50AgentAcquires(self): """Elements acquired by agent""" # skip if request already running self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] == 'running': raise nose.SkipTest start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] if [x for x in request if x['status'] in ('Acquired', 'Running')]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for agent to acquire') time.sleep(15) self.assertTrue( [x for x in request if x['status'] in ('Acquired', 'Running')]) @attr("lifecycle") @recordException def test60RequestRunning(self): """Request running""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] childQueue = [x for x in self.__class__.workqueue.getChildQueuesByRequest() if \ x['request_name'] == self.__class__.request_name] if request and 'Running' in [x['status'] for x in request]: self.assertTrue(childQueue, "Running but can't get child queue") break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to run') time.sleep(15) @attr("lifecycle") @recordException def test70WorkQueueFinished(self): """Request completed in workqueue""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] # request deleted from wq shortly after finishing, so may not appear here if not request or request == [ x for x in request if x['status'] in ('Done', 'Failed', 'Canceled') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test80RequestFinished(self): """Request completed""" start = time.time() while True: self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] in ('completed', 'failed', 'aborted'): break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test90RequestCloseOut(self): """Closeout request""" self.reqmgr.reportRequestStatus(self.__class__.request_name, "closed-out") self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual('closed-out', self.__class__.request['RequestStatus'])
def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint=self.reqMgrUrl) self.reqMgrService = RequestManager(d)