def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running())
def setUp(self): "init test class" self.msConfig = {'verbose': False, 'group': 'DataOps', 'interval': 1 * 60, 'updateInterval': 0, 'enableStatusTransition': True, 'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2', 'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache', 'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod', 'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'} self.ms = MSMonitor(self.msConfig) self.ms.reqmgrAux = MockReqMgrAux() super(MSMonitorTest, self).setUp()
class MSManager(object): """ Entry point for the MicroServices. This class manages both transferor and monitoring services. """ def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running()) def _parseConfig(self, config): """ __parseConfig_ Parse the MicroService configuration and set any default values. :param config: config as defined in the deployment """ self.logger.info("Using the following MicroServices config: %s", config.dictionary_()) self.services = getattr(config, 'services', []) self.msConfig = {} self.msConfig.update(config.dictionary_()) self.msConfig.setdefault("useRucio", False) self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace( 'reqmgr2', 'couchdb/reqmgr_workload_cache') def transferor(self, reqStatus): """ MSManager transferor function. It performs Unified logic for data subscription and transfers requests from assigned to staging/staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the transferor thread...") res = self.msTransferor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total transferor execution time: %.2f secs", res['execution_time']) self.statusTrans = res def monitor(self, reqStatus): """ MSManager monitoring function. It performs transfer requests from staging to staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the monitor thread...") res = self.msMonitor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total monitor execution time: %d secs", res['execution_time']) self.statusMon = res def stop(self): "Stop MSManager" status = None # stop MSMonitor thread if 'monitor' in self.services and hasattr(self, 'monitThread'): self.monitThread.stop() status = self.monitThread.running() # stop MSTransferor thread if 'transferor' in self.services and hasattr(self, 'transfThread'): self.transfThread.stop() # stop checkStatus thread status = self.transfThread.running() return status def info(self, reqName=None): """ Return transfer information for a given request :param reqName: request name :return: data transfer information for this request """ data = {"request": reqName, "transferDoc": None} if reqName: # obtain the transfer information for a given request records from couchdb for given request if 'monitor' in self.services: transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName) elif 'transferor' in self.services: transferDoc = self.msTransferor.reqmgrAux.getTransferInfo( reqName) if transferDoc: # it's always a single document in Couch data['transferDoc'] = transferDoc[0] return data def delete(self, request): "Delete request in backend" pass def status(self, detail): """ Return the current status of a MicroService and a summary of its last execution activity. :param detail: boolean used to retrieve some extra information regarding the service :return: a dictionary """ data = {"status": "OK"} if detail and 'transferor' in self.services: data.update(self.statusTrans) elif detail and 'monitor' in self.services: data.update(self.statusMon) return data def updateTimeUTC(self, reportDict, startT, endT): """ Given a report summary dictionary and start/end time, update the report with human readable timing information :param reportDict: summary dictionary :param startT: epoch start time for a given service :param endT: epoch end time for a given service """ reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['execution_time'] = (endT - startT).total_seconds()
class MSMonitorTest(EmulatedUnitTestCase): "Unit test for Monitor module" def setUp(self): "init test class" self.msConfig = {'verbose': False, 'group': 'DataOps', 'interval': 1 * 60, 'updateInterval': 0, 'enableStatusTransition': True, 'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2', 'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache', 'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod', 'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader', 'rucioAccount': "wma_test", 'rucioUrl': "http://cmsrucio-int.cern.ch", 'rucioAuthUrl': "https://cmsrucio-auth-int.cern.ch"} self.ms = MSMonitor(self.msConfig) self.ms.reqmgrAux = MockReqMgrAux() super(MSMonitorTest, self).setUp() def testUpdateCaches(self): """ Test the getCampaignConfig method """ campaigns, transfersDocs = self.ms.updateCaches() self.assertNotEqual(transfersDocs, []) self.assertEqual(len(transfersDocs[0]['transfers']), 1) self.assertTrue(time.time() > transfersDocs[0]['lastUpdate'], 1) self.assertNotEqual(campaigns, []) for cname, cdict in campaigns.items(): self.assertEqual(cname, cdict['CampaignName']) self.assertEqual(isinstance(cdict, dict), True) self.assertNotEqual(cdict.get('CampaignName', {}), {}) def testGetTransferInfo(self): """ Test the getTransferInfo method """ _, transfersDocs = self.ms.updateCaches() transfersDocs[0]['transfers'] = [] originalTransfers = deepcopy(transfersDocs) self.ms.getTransferInfo(transfersDocs) self.assertNotEqual(transfersDocs, []) self.assertEqual(len(transfersDocs), len(originalTransfers)) for rec in transfersDocs: self.assertEqual(isinstance(rec, dict), True) keys = sorted(['workflowName', 'lastUpdate', 'transfers']) self.assertEqual(keys, sorted(rec.keys())) self.assertTrue(time.time() >= rec['lastUpdate']) def testCompletion(self): """ Test the completion method """ campaigns, transfersDocs = self.ms.updateCaches() transfersDocs.append(deepcopy(transfersDocs[0])) transfersDocs.append(deepcopy(transfersDocs[0])) transfersDocs[0]['transfers'] = [] transfersDocs[0]['workflowName'] = 'workflow_0' transfersDocs[1]['transfers'][0]['completion'].append(100) transfersDocs[1]['workflowName'] = 'workflow_1' transfersDocs[2]['workflowName'] = 'workflow_2' self.assertEqual(len(transfersDocs), 3) completedWfs = self.ms.getCompletedWorkflows(transfersDocs, campaigns) self.assertEqual(len(completedWfs), 2) def testUpdateTransferInfo(self): """ Test the updateTransferInfo method """ _, transferRecords = self.ms.updateCaches() failed = self.ms.updateTransferDocs(transferRecords, workflowsToSkip=[]) self.assertEqual(len(failed), len(transferRecords))
def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} self.statusOutput = {} self.statusRuleCleaner = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.info("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.info("+++ Running %s thread %s", thname, self.monitThread.running()) # initialize output module if 'output' in self.services: reqStatus = ['closed-out', 'announced'] # thread safe cache to keep the last X requests processed in MSOutput requestNamesCached = deque( maxlen=self.msConfig.get("cacheRequestSize", 10000)) thname = 'MSOutputConsumer' self.msOutputConsumer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) # set the consumer to run twice faster than the producer consumerInterval = self.msConfig['interval'] // 2 self.outputConsumerThread = start_new_thread( thname, daemon, (self.outputConsumer, reqStatus, consumerInterval, self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputConsumerThread.running()) thname = 'MSOutputProducer' self.msOutputProducer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) self.outputProducerThread = start_new_thread( thname, daemon, (self.outputProducer, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputProducerThread.running()) # initialize rule cleaner module if 'ruleCleaner' in self.services: reqStatus = ['announced', 'aborted-completed', 'rejected'] self.msRuleCleaner = MSRuleCleaner(self.msConfig, logger=self.logger) thname = 'MSRuleCleaner' self.ruleCleanerThread = start_new_thread( thname, daemon, (self.ruleCleaner, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("--- Running %s thread %s", thname, self.ruleCleanerThread.running())
class MSManager(object): """ Entry point for the MicroServices. This class manages both transferor and monitoring services. """ def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} self.statusOutput = {} self.statusRuleCleaner = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.info("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.info("+++ Running %s thread %s", thname, self.monitThread.running()) # initialize output module if 'output' in self.services: reqStatus = ['closed-out', 'announced'] # thread safe cache to keep the last X requests processed in MSOutput requestNamesCached = deque( maxlen=self.msConfig.get("cacheRequestSize", 10000)) thname = 'MSOutputConsumer' self.msOutputConsumer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) # set the consumer to run twice faster than the producer consumerInterval = self.msConfig['interval'] // 2 self.outputConsumerThread = start_new_thread( thname, daemon, (self.outputConsumer, reqStatus, consumerInterval, self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputConsumerThread.running()) thname = 'MSOutputProducer' self.msOutputProducer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) self.outputProducerThread = start_new_thread( thname, daemon, (self.outputProducer, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputProducerThread.running()) # initialize rule cleaner module if 'ruleCleaner' in self.services: reqStatus = ['announced', 'aborted-completed', 'rejected'] self.msRuleCleaner = MSRuleCleaner(self.msConfig, logger=self.logger) thname = 'MSRuleCleaner' self.ruleCleanerThread = start_new_thread( thname, daemon, (self.ruleCleaner, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("--- Running %s thread %s", thname, self.ruleCleanerThread.running()) def _parseConfig(self, config): """ __parseConfig_ Parse the MicroService configuration and set any default values. :param config: config as defined in the deployment """ self.logger.info("Using the following MicroServices config: %s", config.dictionary_()) self.services = getattr(config, 'services', []) self.msConfig = {} self.msConfig.update(config.dictionary_()) self.msConfig.setdefault("useRucio", False) self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace( 'reqmgr2', 'couchdb/reqmgr_workload_cache') def transferor(self, reqStatus): """ MSManager transferor function. It performs Unified logic for data subscription and transfers requests from assigned to staging/staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the transferor thread...") res = self.msTransferor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total transferor execution time: %.2f secs", res['execution_time']) self.statusTrans = res def monitor(self, reqStatus): """ MSManager monitoring function. It performs transfer requests from staging to staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the monitor thread...") res = self.msMonitor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total monitor execution time: %d secs", res['execution_time']) self.statusMon = res def outputConsumer(self, reqStatus): """ MSManager Output Data Placement function. It subscribes the output datasets to the Data Management System. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output reqStatus: Status of requests to work on """ startTime = datetime.utcnow() self.logger.info("Starting the outputConsumer thread...") res = self.msOutputConsumer.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total outputConsumer execution time: %d secs", res['execution_time']) self.statusOutput = res def outputProducer(self, reqStatus): """ MSManager MongoDB Uploader function. It uploads the documents describing a workflow output Data subscription into MongoDb. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output reqStatus: Status of requests to work on """ startTime = datetime.utcnow() self.logger.info("Starting the outputProducer thread...") res = self.msOutputProducer.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total outputProducer execution time: %d secs", res['execution_time']) self.statusOutput = res def ruleCleaner(self, reqStatus): """ MSManager ruleCleaner function. It cleans the block level Rucio rules created by WMAgent and performs request status transition from ['announced', 'aborted-completed', 'rejected'] to '{normal, aborted, rejected}-archived' state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-RuleCleaner """ startTime = datetime.utcnow() self.logger.info("Starting the ruleCleaner thread...") res = self.msRuleCleaner.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total ruleCleaner execution time: %d secs", res['execution_time']) self.statusRuleCleaner = res def stop(self): "Stop MSManager" status = None # stop MSMonitor thread if 'monitor' in self.services and hasattr(self, 'monitThread'): self.monitThread.stop() status = self.monitThread.running() # stop MSTransferor thread if 'transferor' in self.services and hasattr(self, 'transfThread'): self.transfThread.stop() # stop checkStatus thread status = self.transfThread.running() # stop MSOutput threads if 'output' in self.services and hasattr(self, 'outputConsumerThread'): self.outputConsumerThread.stop() status = self.outputConsumerThread.running() if 'output' in self.services and hasattr(self, 'outputProducerThread'): self.outputProducerThread.stop() status = self.outputProducerThread.running() # stop MSRuleCleaner thread if 'ruleCleaner' in self.services and hasattr(self, 'ruleCleanerThread'): self.ruleCleanerThread.stop() status = self.ruleCleanerThread.running() return status def info(self, reqName=None): """ Return transfer information for a given request :param reqName: request name :return: data transfer information for this request """ data = {"request": reqName, "transferDoc": None} if reqName: # obtain the transfer information for a given request records from couchdb for given request if 'monitor' in self.services: transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName) elif 'transferor' in self.services: transferDoc = self.msTransferor.reqmgrAux.getTransferInfo( reqName) elif 'output' in self.services: transferDoc = self.msOutputProducer.getTransferInfo(reqName) if transferDoc: # it's always a single document in Couch data['transferDoc'] = transferDoc[0] return data def delete(self, request): "Delete request in backend" pass def status(self, detail): """ Return the current status of a MicroService and a summary of its last execution activity. :param detail: boolean used to retrieve some extra information regarding the service :return: a dictionary """ data = {"status": "OK"} if detail and 'transferor' in self.services: data.update(self.statusTrans) elif detail and 'monitor' in self.services: data.update(self.statusMon) elif detail and 'output' in self.services: data.update(self.statusOutput) elif detail and 'ruleCleaner' in self.services: data.update(self.statusRuleCleaner) return data def updateTimeUTC(self, reportDict, startT, endT): """ Given a report summary dictionary and start/end time, update the report with human readable timing information :param reportDict: summary dictionary :param startT: epoch start time for a given service :param endT: epoch end time for a given service """ reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['execution_time'] = (endT - startT).total_seconds()