def testHeartbeat(self): testComponent = HeartbeatAPI("testComponent") testComponent.registerComponent() self.assertEqual(testComponent.getHeartbeatInfo(), []) testComponent.updateWorkerHeartbeat("testWorker") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 1) self.assertEqual(result[0]['worker_name'], "testWorker") time.sleep(1) testComponent.updateWorkerHeartbeat("testWorker2") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 1) self.assertEqual(result[0]['worker_name'], "testWorker2") time.sleep(1) testComponent.updateWorkerHeartbeat("testWorker") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 1) self.assertEqual(result[0]['worker_name'], "testWorker") testComponent = HeartbeatAPI("test2Component") testComponent.registerComponent() time.sleep(1) testComponent.updateWorkerHeartbeat("test2Worker") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 2) self.assertEqual(result[0]['worker_name'], "testWorker") self.assertEqual(result[1]['worker_name'], "test2Worker") time.sleep(1) testComponent.updateWorkerHeartbeat("test2Worker2") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 2) self.assertEqual(result[0]['worker_name'], "testWorker") self.assertEqual(result[1]['worker_name'], "test2Worker2") time.sleep(1) testComponent.updateWorkerHeartbeat("test2Worker") result = testComponent.getHeartbeatInfo() self.assertEqual(len(result), 2) self.assertEqual(result[0]['worker_name'], "testWorker") self.assertEqual(result[1]['worker_name'], "test2Worker") testComponent.updateWorkerError("test2Worker", "Error1") result = testComponent.getHeartbeatInfo() self.assertEqual(result[1]['error_message'], "Error1")
def setUp(self): """ _setUp_ Standard setup: Now with 100% more couch """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database" ]) self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump") self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.baDaoFactory = DAOFactory(package="WMCore.BossAir", logger=myThread.logger, dbinterface=myThread.dbi) self.testDir = self.testInit.generateWorkDir() # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() self.configFile = EmulatorSetup.setupWMAgentConfig() return
def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also, create some dummy locations. """ super(JobCreatorTest, self).setUp() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=[ 'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database' ], useDefault=False) self.couchdbname = "jobcreator_t" self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump") self.configFile = EmulatorSetup.setupWMAgentConfig() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.daoFactory(classname="Locations.New") for site in self.sites: locationAction.execute(siteName=site, pnn=site) # Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName=site, pnn=site, ceName=site) resourceControl.insertThreshold(siteName=site, taskType='Processing', maxSlots=10000, pendingSlots=10000) self.resourceControl = resourceControl self._setup = True self._teardown = False self.testDir = self.testInit.generateWorkDir() self.cwd = os.getcwd() # Set heartbeat self.componentName = 'JobCreator' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() if PY3: self.assertItemsEqual = self.assertCountEqual return
def testUpdateWorkers(self): """ _testUpdateWorkers_ Create a couple of components and workers and test the update methods """ comp1 = HeartbeatAPI("testComponent1", pollInterval=60, heartbeatTimeout=600) comp1.registerComponent() comp1.registerWorker("testWorker1") comp1.registerWorker("testWorker2") comp2 = HeartbeatAPI("testComponent2", pollInterval=30, heartbeatTimeout=300) comp2.registerComponent() comp2.registerWorker("testWorker21") comp1.updateWorkerCycle("testWorker1", 1.001, None) comp2.updateWorkerCycle("testWorker21", 1234.1, 100) hb1 = comp1.getHeartbeatInfo() hb2 = comp2.getHeartbeatInfo() for worker in hb1: if worker['worker_name'] == 'testWorker1': self.assertTrue(worker["cycle_time"] > 1.0) else: self.assertEqual(worker["cycle_time"], 0) self.assertItemsEqual([item["outcome"] for item in hb1], [None, None]) self.assertItemsEqual([item["error_message"] for item in hb1], [None, None]) self.assertEqual(round(hb2[0]["cycle_time"], 1), 1234.1) self.assertEqual(hb2[0]["outcome"], '100') self.assertEqual(hb2[0]["error_message"], None) # time to update workers with an error comp1.updateWorkerError("testWorker2", "BAD JOB!!!") hb1 = comp1.getHeartbeatInfo() for worker in hb1: if worker['worker_name'] == 'testWorker2': self.assertTrue(worker["last_error"] > int(time.time() - 10)) self.assertEqual(worker["state"], "Error") self.assertEqual(worker["error_message"], "BAD JOB!!!")
def setUp(self): """ _setUp_ setUp function for unittest """ # Set constants self.couchDB = "config_test" self.configURL = "RANDOM;;URL;;NAME" self.configString = "This is a random string" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'], useDefault=False) self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache") myThread = threading.currentThread() self.bufferFactory = DAOFactory( package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.bufferFactory( classname="DBSBufferFiles.AddLocation") locationAction.execute(siteName="se1.cern.ch") locationAction.execute(siteName="se1.fnal.gov") locationAction.execute(siteName="malpaquet") # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() # Set up a config cache configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDB) configCache.createUserGroup(groupname="testGroup", username='******') self.testDir = self.testInit.generateWorkDir() psetPath = os.path.join(self.testDir, "PSet.txt") f = open(psetPath, 'w') f.write(self.configString) f.close() configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.save() self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB, configCache.getCouchID()) return
def setUp(self): """ _setUp_ Set up vital components """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS",'WMCore.MsgService', 'WMCore.ResourceControl', 'WMCore.ThreadPool', 'WMCore.Agent.Database'], useDefault = False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daoFactory(classname = "Locations.New") pendingSlots = self.daoFactory(classname = "Locations.SetPendingSlots") for site in self.sites: locationAction.execute(siteName = site, seName = 'se.%s' % (site), ceName = site) pendingSlots.execute(siteName = site, pendingSlots = 1000) #Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), ceName = site) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) self.testDir = self.testInit.generateWorkDir() # Set heartbeat for component in self.components: heartbeatAPI = HeartbeatAPI(component) heartbeatAPI.registerComponent() return
def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the Heartbeat tables. Also add some dummy locations. """ self.testInit = TestInit(__file__) self.testInit.setLogging() # logLevel = logging.SQLDEBUG self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.Agent.Database"], useDefault=False) self.heartbeat = HeartbeatAPI("testComponent")
def prepareToStart(self): """ _prepareToStart_ returns: Nothing Starts the initialization procedure. It is mainly an aggregation method so it can easily used in tests. """ self.state = 'initialize' self.initInThread() # note: every component gets a (unique) name: # self.config.Agent.componentName logging.info(">>>Registering Component - %s", self.config.Agent.componentName) if getattr(self.config.Agent, "useHeartbeat", True): self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName) self.heartbeatAPI.registerComponent() logging.info('>>>Starting initialization') logging.info('>>>Setting default transaction') myThread = threading.currentThread() self.preInitialization() if myThread.sql_transaction: myThread.transaction.begin() self.initialization() self.postInitialization() if myThread.sql_transaction: myThread.transaction.commit() logging.info('>>>Committing default transaction') logging.info(">>>Starting worker threads") myThread.workerThreadManager.resumeWorkers() logging.info(">>>Initialization finished!\n") # wait for messages self.state = 'active'
def prepareWorker(self, worker, idleTime): """ Prepares a worker thread before running """ # Work timing worker.idleTime = idleTime worker.component = self.component self.lock.acquire() self.slavecounter += 1 worker.slaveid = "%s-%s" % (self.wtmnumber, self.slavecounter) self.lock.release() # Thread synchronisation worker.notifyTerminate = self.terminateSlaves worker.terminateCallback = self.slaveTerminateCallback worker.notifyPause = self.pauseSlaves worker.notifyResume = self.resumeSlaves if hasattr(self.component.config, "Agent"): if getattr(self.component.config.Agent, "useHeartbeat", True): worker.heartbeatAPI = HeartbeatAPI(self.component.config.Agent.componentName)
def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.tearDown() self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database" ], useDefault=False) self.testInit.setupCouch("bossair_t/jobs", "JobDump") self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump") self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs") #Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName=site, pnn='se.%s' % (site), cmsName=site, ceName=site, plugin="CondorPlugin", pendingSlots=1000, runningSlots=2000) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 1000, pendingSlots = 1000) resourceControl.insertSite(siteName='Xanadu', pnn='se.Xanadu', cmsName=site, ceName='Xanadu', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) resourceControl.insertSite(siteName='jade-cms.hip.fi', pnn='madhatter.csc.fi', cmsName=site, ceName='jade-cms.hip.fi', plugin="ARCPlugin") resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \ maxSlots = 100, pendingSlots = 100) # using this for glite submissions resourceControl.insertSite(siteName='grid-ce-01.ba.infn.it', pnn='storm-se-01.ba.infn.it', cmsName=site, ceName='grid-ce-01.ba.infn.it', plugin='gLitePlugin') resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \ maxSlots = 50, pendingSlots = 50) # Create user newuser = self.daoFactory(classname="Users.New") newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole") # We actually need the user name self.user = getpass.getuser() # Change this to the working dir to keep track of error and log files from condor self.testDir = self.testInit.generateWorkDir() # Set heartbeat componentName = 'test' self.heartbeatAPI = HeartbeatAPI(componentName) self.heartbeatAPI.registerComponent() componentName = 'JobTracker' self.heartbeatAPI2 = HeartbeatAPI(componentName) self.heartbeatAPI2.registerComponent() return
def __init__(self, slaveClassName, totalSlaves, componentDir, config, namespace = 'WMComponent', inPort = '5555', outPort = '5558'): """ __init__ Constructor for the process pool. The slave class name must be based inside the WMComponent namespace. For examples, the JobAccountant would pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker class. All log files will be stored in the component directory that is passed in. Each slave will have its own log file. Note that the config is only used to determine database connection parameters. It is not passed to the slave class. The slaveInit parameter will be serialized and passed to the slave class's constructor. """ self.enqueueIndex = 0 self.dequeueIndex = 0 self.runningWork = 0 #Use the Services.Requests JSONizer, which handles __to_json__ calls self.jsonHandler = JSONRequests() # heartbeat should be registered at this point if getattr(config.Agent, "useHeartbeat", True): self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave")) self.slaveClassName = slaveClassName self.componentDir = componentDir self.config = config # Grab the python version from the current version # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X majorVersion = sys.version_info[0] minorVersion = sys.version_info[1] if majorVersion and minorVersion: self.versionString = "python%i.%i" % (majorVersion, minorVersion) else: self.versionString = "python2.6" self.workers = [] self.nSlaves = totalSlaves self.namespace = namespace self.inPort = inPort self.outPort = outPort # Pickle the config self.configPath = os.path.join(componentDir, '%s_config.pkl' % slaveClassName) if os.path.exists(self.configPath): # Then we note it and overwrite it msg = "Something's in the way of the ProcessPool config: %s" % self.configPath logging.error(msg) f = open(self.configPath, 'w') cPickle.dump(config, f) f.close() # Set up ZMQ try: context = zmq.Context() self.sender = context.socket(zmq.PUSH) self.sender.bind("tcp://*:%s" % inPort) self.sink = context.socket(zmq.PULL) self.sink.bind("tcp://*:%s" % outPort) except zmq.ZMQError: # Try this again in a moment to see # if it's just being held by something pre-existing import time time.sleep(1) logging.error("Blocked socket on startup: Attempting sleep to give it time to clear.") try: context = zmq.Context() self.sender = context.socket(zmq.PUSH) self.sender.bind("tcp://*:%s" % inPort) self.sink = context.socket(zmq.PULL) self.sink.bind("tcp://*:%s" % outPort) except Exception as ex: msg = "Error attempting to open TCP sockets\n" msg += str(ex) logging.error(msg) import traceback print traceback.format_exc() raise ProcessPoolException(msg) # Now actually create the slaves self.createSlaves() return
def testAddComponent(self): """ _testAddComponent_ Test creation of components and worker threads as well as the get heartbeat DAOs """ comp1 = HeartbeatAPI("testComponent1", pollInterval=60, heartbeatTimeout=600) comp1.registerComponent() self.assertEqual(comp1.getHeartbeatInfo(), []) # no worker thread yet comp1.registerWorker("testWorker1") self.assertEqual(len(comp1.getHeartbeatInfo()), 1) comp1.registerWorker("testWorker2") self.assertEqual(len(comp1.getHeartbeatInfo()), 2) comp2 = HeartbeatAPI("testComponent2", pollInterval=30, heartbeatTimeout=300) comp2.registerComponent() self.assertEqual(comp2.getHeartbeatInfo(), []) # no worker thread yet self.assertEqual(len(comp2.getAllHeartbeatInfo()), 2) comp2.registerWorker("testWorker21") self.assertEqual(len(comp2.getHeartbeatInfo()), 1) self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3) comp1.updateWorkerHeartbeat("testWorker1", "Running") comp1.updateWorkerHeartbeat("testWorker2", "Running") comp2.updateWorkerHeartbeat("testWorker21", "Running") self.assertEqual(len(comp1.getAllHeartbeatInfo()), 3) self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3) comp1Res = comp1.getHeartbeatInfo() comp2Res = comp2.getHeartbeatInfo() self.assertEqual(len(comp1Res), 2) self.assertEqual(len(comp2Res), 1) self.assertItemsEqual([item["name"] for item in comp1Res], ["testComponent1", "testComponent1"]) self.assertItemsEqual([item["worker_name"] for item in comp1Res], ["testWorker1", "testWorker2"]) self.assertItemsEqual([item["state"] for item in comp1Res], ["Running", "Running"]) self.assertItemsEqual([item["poll_interval"] for item in comp1Res], [60, 60]) self.assertItemsEqual([item["update_threshold"] for item in comp1Res], [600, 600]) self.assertItemsEqual([item["name"] for item in comp2Res], ["testComponent2"]) self.assertItemsEqual([item["worker_name"] for item in comp2Res], ["testWorker21"]) self.assertItemsEqual([item["state"] for item in comp2Res], ["Running"]) self.assertItemsEqual([item["poll_interval"] for item in comp2Res], [30]) self.assertItemsEqual([item["update_threshold"] for item in comp2Res], [300])