class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.tier0Mode = getattr(config.PhEDExInjector, "tier0Mode", False) # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.getUnsubscribedBlocks = daofactory(classname = "GetUnsubscribedBlocks") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in [ "MSS", "Disk" ]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ if self.tier0Mode: self.subscribeTier0Blocks() self.subscribeDatasets() return def subscribeTier0Blocks(self): """ _subscribeTier0Blocks_ Subscribe blocks to the Tier-0 where a replica subscription already exists. All Tier-0 subscriptions are move, custodial and autoapproved with high priority. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for candidate blocks for subscription blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN', conn = myThread.transaction.conn, transaction = True) if not blocksToSubscribe: return # For the blocks we don't really care about the subscription options # We are subscribing all blocks with the same recipe. subscriptionMap = {} for subInfo in blocksToSubscribe: dataset = subInfo['path'] if dataset not in subscriptionMap: subscriptionMap[dataset] = [] subscriptionMap[dataset].append(subInfo['blockname']) site = 'T0_CH_CERN' custodial = 'y' request_only = 'n' move = 'y' priority = 'High' # Get the phedex node phedexNode = self.cmsToPhedexMap[site]["MSS"] logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()]))) newSubscription = PhEDExSubscription(subscriptionMap.keys(), phedexNode, self.group, custodial = custodial, request_only = request_only, move = move, priority = priority, level = 'block', blocks = subscriptionMap) # TODO: Check for blocks already subscribed try: xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, newSubscription.getDatasetsAndBlocks()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) except Exception, ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex))
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ self.mySiteDB = SiteDBJSON() @attr("integration") def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) @attr("integration") def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') @attr("integration") def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = ['srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) @attr("integration") def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = 'T1_US_FNAL' results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) @attr("integration") def testCmsNametoCE(self): """ Tests CmsNametoCE """ target = ['lcgce06.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk', 'lcgce09.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoCE("T1_UK_RAL") self.failUnless(sorted(results) == target) @attr("integration") def testJSONParser(self): """ Tests the JSON parser directly """ cmsName = "cmsgrid02.hep.wisc.edu" results = self.mySiteDB.getJSON("CEtoCMSName", file="CEtoCMSName", name=cmsName) self.failUnless(results['0']['name'] == "T2_US_Wisconsin") @attr("integration") def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/C=UK/O=eScience/OU=Bristol/L=IS/CN=simon metson" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) @attr("integration") def testParsingJsonWithApostrophe(self): """ Tests parsing a DN json with an apostrophe in """ json = """{"dn": "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", "user": "******"}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn']) @attr("integration") def testParsingInvalidJsonWithApostrophe(self): """ Tests parsing a DN invalid json (from sitedb v1) with an apostrophe in """ json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano", d['dn']) json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn'])
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint=dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") print "dataset", datasetName, "parts", outputDatasetParts try: #retrieve list of blocks from dataset blockNames = dbsReader.listFileBlocks(datasetName) except: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) #traverse each block for blockName in blockNames: #deal with white and black list. if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue #existing blocks in phedex replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName, subscribed='y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) blockLocations = set() #load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.phEDExNodetocmsName(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: blockLocations.update(siteDB.cmsNametoSE(cmsName)) #for each file on the block for blockFile in blockFiles: parentLFNs = [] #get parent information about file blockFileParents = dbsReader.listFilesInBlockWithParents(blockName) #populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} #Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection[ "RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append( lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = { "runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs } return files
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint = dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1], outputDatasetParts[3], outputDatasetParts[2]) if len(datasets) == 0: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) blockNames = dbsReader.listFileBlocks(datasetName) for blockName in blockNames: if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName, subscribed = 'y') block = dbsReader.listFilesInBlockWithParents(blockName) blockLocations = set() if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.phEDExNodetocmsName(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: blockLocations.update(siteDB.cmsNametoSE(cmsName)) for blockFile in block: parentLFNs = [] for fileParent in blockFile["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection["RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = {"runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs} return files
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") self.getPartiallySubscribed = daofactory(classname = "GetPartiallySubscribedDatasets") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] return def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn, transaction = True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec}) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg = msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, False, False) else: tupleKey = (subInfo["Priority"], True, False, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed = 1, conn = myThread.transaction.conn, transaction = True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg = msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid if "MSS" in self.cmsToPhedexMap[site]: phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = {"custodial" : "n", "requestOnly" : "y", "priority" : subscriptionFlavor[0].lower(), "move" : "n"} if subscriptionFlavor[1]: options["custodial"] = "y" if subscriptionFlavor[3]: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ EmulatorHelper.setEmulators(siteDB = True) self.mySiteDB = SiteDBJSON() def tearDown(self): EmulatorHelper.resetEmulators() def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = ['srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL'] results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) target = sorted(['T2_CH_CERN', 'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.failUnless(results == target) def testCmsNametoCE(self): """ Tests CmsNametoCE """ target = ['lcgce11.gridpp.rl.ac.uk', 'lcgce10.gridpp.rl.ac.uk', 'lcgce02.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoCE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) @attr("integration") def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmssrm.fnal.gov' in seNames) return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {"MSS": [], "Disk": []} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory( package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi ) self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in ["MSS", "Disk"]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ self.subscribeDatasets() return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn=myThread.transaction.conn, transaction=True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo["site"] if site not in self.phedexNodes["MSS"] and site not in self.phedexNodes["Disk"]: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo["id"] logging.error(msg) self.sendAlert(7, msg=msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes["MSS"]: subInfo["custodial"] = "n" # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo["request_only"] = "y" phedexSub = PhEDExSubscription( subInfo["path"], site, self.group, priority=subInfo["priority"], move=subInfo["move"], custodial=subInfo["custodial"], request_only=subInfo["request_only"], subscriptionId=subInfo["id"], ) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or phedexSub.matchesExistingTransferRequest( self.phedex ): subscriptionsMade.append(subInfo["id"]) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % ( subscription.getDatasetPaths(), subscription.getNodes(), ) msg += "Move: %s, Custodial: %s, Request Only: %s" % ( subscription.move, subscription.custodial, subscription.request_only, ) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception as ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade, conn=myThread.transaction.conn, transaction=True) myThread.transaction.commit() return
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS', 'T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = ['srm-cms.gridpp.rl.ac.uk', 'srm-cms-disk.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = 'T1_US_FNAL' results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) def testCmsNametoCE(self): """ Tests CmsNametoCE """ target = [ 'lcgce09.gridpp.rl.ac.uk', 'lcgce06.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk' ] results = self.mySiteDB.cmsNametoCE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testJSONParser(self): """ Tests the JSON parser directly """ cmsName = "cmsgrid02.hep.wisc.edu" results = self.mySiteDB.getJSON("CEtoCMSName", file="CEtoCMSName", name=cmsName) self.failUnless(results['0']['name'] == "T2_US_Wisconsin") def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/C=UK/O=eScience/OU=Bristol/L=IS/CN=simon metson" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) @attr("integration") def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ ceNames = self.mySiteDB.getAllSENames() self.assertTrue(len(ceNames) > 1) self.assertTrue('cmssrm.fnal.gov' in ceNames) return @attr("integration") def testParsingJsonWithApostrophe(self): """ Tests parsing a DN json with an apostrophe in """ json = """{"dn": "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", "user": "******"}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals( "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn']) @attr("integration") def testParsingInvalidJsonWithApostrophe(self): """ Tests parsing a DN invalid json (from sitedb v1) with an apostrophe in """ json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals( "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano", d['dn']) json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals( "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn'])
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint = dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") print "dataset",datasetName,"parts",outputDatasetParts try: #retrieve list of blocks from dataset blockNames = dbsReader.listFileBlocks(datasetName) except: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) #traverse each block for blockName in blockNames: #deal with white and black list. if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue #existing blocks in phedex replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName, subscribed = 'y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) blockLocations = set() #load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.phEDExNodetocmsName(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: blockLocations.update(siteDB.cmsNametoSE(cmsName)) #for each file on the block for blockFile in blockFiles: parentLFNs = [] #get parent information about file blockFileParents = dbsReader.listFilesInBlockWithParents(blockName) #populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} #Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection["RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = {"runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs} return files
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint = dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1], outputDatasetParts[3], outputDatasetParts[2]) if len(datasets) == 0: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) blockNames = dbsReader.listFileBlocks(datasetName) for blockName in blockNames: if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName, subscribed = 'y') block = dbsReader.listFilesInBlockWithParents(blockName) blockLocations = set() if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.phEDExNodetocmsName(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: blockLocations.update(siteDB.cmsNametoSE(cmsName)) for blockFile in block: parentLFNs = [] for fileParent in blockFile["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection["RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = {"runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs} return files
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ EmulatorHelper.setEmulators(siteDB = True) self.mySiteDB = SiteDBJSON() def tearDown(self): EmulatorHelper.resetEmulators() def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = ['srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testCmsNamePatterntoSE(self): """ Tests CmsNamePatterntoSE """ target = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] results = self.mySiteDB.cmsNametoSE("%T2_XX") self.failUnless(sorted(results) == sorted(target)) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL'] results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) target = sorted(['T2_CH_CERN', 'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.failUnless(results == target) def testCmsNametoCE(self): """ Tests CmsNametoCE """ target = ['lcgce11.gridpp.rl.ac.uk', 'lcgce10.gridpp.rl.ac.uk', 'lcgce02.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoCE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testCmsNamePatterntoCE(self): """ Tests CmsNamePatterntoCE """ target = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] results = self.mySiteDB.cmsNametoCE("%T2_XX") self.failUnless(sorted(results) == sorted(target)) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) @attr("integration") def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmssrm.fnal.gov' in seNames) return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi) self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") self.getPartiallySubscribed = daofactory( classname="GetPartiallySubscribedDatasets") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] return def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute( conn=myThread.transaction.conn, transaction=True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute( conn=myThread.transaction.conn, transaction=True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({ "workflow": workflow, "spec": spec }) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg=msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups( subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups( subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups( subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts( subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list( set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, autoApprove, False) else: tupleKey = (subInfo["Priority"], True, autoApprove, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] # Subscriptions are sorted by options, defined by tupleKey # The tuple key has 3 or 4 entries in this order # Priority, Custodial, Auto approve, Move (True) or Replica (False) siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True # Non-custodial is never move, so this tuple has only 3 entries # TODO: Change tuples to frozensets for clarity tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed=1, conn=myThread.transaction.conn, transaction=True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg=msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid isMSS = False if "MSS" in self.cmsToPhedexMap[site]: isMSS = True phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = { "custodial": "n", "requestOnly": "y", "priority": subscriptionFlavor[0].lower(), "move": "n" } if subscriptionFlavor[1] and isMSS: # Custodial subscriptions are only allowed in MSS nodes # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription options["custodial"] = "y" if subscriptionFlavor[3] and not self.replicaOnly: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" logging.info( "Request options: Custodial - %s, Move - %s, Request Only - %s" % (options["custodial"].upper(), options["move"].upper(), options["requestOnly"].upper())) newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in [ "MSS", "Disk" ]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ self.subscribeDatasets() return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription(subInfo['path'], site, self.group, priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes()) msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception as ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade, conn = myThread.transaction.conn, transaction = True) myThread.transaction.commit() return