def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes[ 'MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg=msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription( subInfo['path'], site, subInfo['phedex_group'], priority=subInfo['priority'], move=subInfo['move'], custodial=subInfo['custodial'], request_only=subInfo['request_only'], subscriptionId=subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s", xmlData) logging.info( "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error( "PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid move subscriptions and replica if subInfo['custodial'] == 'n': subInfo['move'] = 'n' phedexSub = PhEDExSubscription(subInfo['path'], site, self.group, priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes()) msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception, ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds())
def testSubscriptionList(self): """ _SubscriptionList_ Check that we can organize and agreggate correctly a bunch of different subscriptions in standard scenarios """ subList = SubscriptionList() # Two GEN datasets subscribed to many sites non-custodially and custodially to one subs = [] genDatasetA = "/DeadlyNeurotoxinOnTestSubjectSim/Run1970-Test-v2/GEN" subs.append({ "datasetPathList": genDatasetA, "nodeList": "T1_US_FNAL", "group": "dataops", "custodial": "y", "move": "y" }) subs.append({ "datasetPathList": genDatasetA, "nodeList": "T2_IT_Bari", "group": "dataops", "request_only": "n" }) subs.append({ "datasetPathList": genDatasetA, "nodeList": "T2_CH_CERN", "group": "dataops", "request_only": "n" }) subs.append({ "datasetPathList": genDatasetA, "nodeList": "T2_US_Wisconsin", "group": "dataops" }) genDatasetB = "/NotEnoughEnergyToLieIn1.1V/Run1970-Potato-v2/GEN" subs.append({ "datasetPathList": genDatasetB, "nodeList": "T1_IT_CNAF", "group": "dataops", "custodial": "y", "move": "y" }) subs.append({ "datasetPathList": genDatasetB, "nodeList": "T2_IT_Bari", "group": "dataops", "request_only": "n" }) subs.append({ "datasetPathList": genDatasetB, "nodeList": "T2_CH_CERN", "group": "dataops", "request_only": "n" }) subs.append({ "datasetPathList": genDatasetB, "nodeList": "T2_US_Wisconsin", "group": "dataops" }) # RECO,DQM,AOD datasets subscribed custodially to 2 sites recoDatasetA = '/TestWeightedCubes/Run1970-Test-v2/%s' recoDatasetB = '/RepulsiveGel/Run1970-Test-v2/%s' subs.append({ "datasetPathList": recoDatasetA % 'AOD', "nodeList": "T1_US_FNAL", "group": "dataops", "custodial": "y", "move": "y" }) subs.append({ "datasetPathList": recoDatasetA % 'DQM', "nodeList": "T1_US_FNAL", "group": "dataops", "custodial": "y", "move": "y" }) subs.append({ "datasetPathList": recoDatasetB % 'AOD', "nodeList": "T1_DE_KIT", "group": "dataops", "custodial": "y", "move": "y" }) subs.append({ "datasetPathList": recoDatasetB % 'DQM', "nodeList": "T1_DE_KIT", "group": "dataops", "custodial": "y", "move": "y" }) for sub in subs: phedexSub = PhEDExSubscription(**sub) subList.addSubscription(phedexSub) # One subscription per node self.assertEqual(len(subList.getSubscriptionList()), 6) goldenDatasetLists = [ set([genDatasetA, genDatasetB]), set([genDatasetA, recoDatasetA % 'AOD', recoDatasetA % 'DQM']), set([genDatasetB]), set([recoDatasetB % 'AOD', recoDatasetB % 'DQM']) ] for sub in subList.getSubscriptionList(): self.assertTrue(set(sub.getDatasetPaths()) in goldenDatasetLists) subList.compact() # Compact should have reduced 2 of them to 1 goldenNodeLists = [ set(["T1_US_FNAL"]), set(["T2_IT_Bari", "T2_CH_CERN"]), set(["T1_IT_CNAF"]), set(["T1_DE_KIT"]), set(["T2_US_Wisconsin"]) ] self.assertEqual(len(subList.getSubscriptionList()), 5) for sub in subList.getSubscriptionList(): self.assertTrue(set(sub.getNodes()) in goldenNodeLists) return
def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'], priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s" , xmlData) logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
def testSubscriptionList(self): """ _SubscriptionList_ Check that we can organize and agreggate correctly a bunch of different subscriptions in standard scenarios """ subList = SubscriptionList() # Two GEN datasets subscribed to many sites non-custodially and custodially to one subs = [] genDatasetA = "/DeadlyNeurotoxinOnTestSubjectSim/Run1970-Test-v2/GEN" subs.append({"datasetPathList" : genDatasetA, "nodeList" : "T1_US_FNAL", "group" : "dataops", "custodial" : "y", "move" : "y"}) subs.append({"datasetPathList" : genDatasetA, "nodeList" : "T2_IT_Bari", "group" : "dataops", "request_only" : "n"}) subs.append({"datasetPathList" : genDatasetA, "nodeList" : "T2_CH_CERN", "group" : "dataops", "request_only" : "n"}) subs.append({"datasetPathList" : genDatasetA, "nodeList" : "T2_US_Wisconsin", "group" : "dataops"}) genDatasetB = "/NotEnoughEnergyToLieIn1.1V/Run1970-Potato-v2/GEN" subs.append({"datasetPathList" : genDatasetB, "nodeList" : "T1_IT_CNAF", "group" : "dataops", "custodial" : "y", "move" : "y"}) subs.append({"datasetPathList" : genDatasetB, "nodeList" : "T2_IT_Bari", "group" : "dataops", "request_only" : "n"}) subs.append({"datasetPathList" : genDatasetB, "nodeList" : "T2_CH_CERN", "group" : "dataops", "request_only" : "n"}) subs.append({"datasetPathList" : genDatasetB, "nodeList" : "T2_US_Wisconsin", "group" : "dataops"}) # RECO,DQM,AOD datasets subscribed custodially to 2 sites recoDatasetA = '/TestWeightedCubes/Run1970-Test-v2/%s' recoDatasetB = '/RepulsiveGel/Run1970-Test-v2/%s' subs.append({"datasetPathList" : recoDatasetA % 'AOD' , "nodeList" : "T1_US_FNAL", "group" : "dataops", "custodial" : "y", "move" : "y"}) subs.append({"datasetPathList" : recoDatasetA % 'DQM' , "nodeList" : "T1_US_FNAL", "group" : "dataops", "custodial" : "y", "move" : "y"}) subs.append({"datasetPathList" : recoDatasetB % 'AOD' , "nodeList" : "T1_DE_KIT", "group" : "dataops", "custodial" : "y", "move" : "y"}) subs.append({"datasetPathList" : recoDatasetB % 'DQM' , "nodeList" : "T1_DE_KIT", "group" : "dataops", "custodial" : "y", "move" : "y"}) for sub in subs: phedexSub = PhEDExSubscription(**sub) subList.addSubscription(phedexSub) # One subscription per node self.assertEqual(len(subList.getSubscriptionList()), 6) goldenDatasetLists = [set([genDatasetA, genDatasetB]), set([genDatasetA, recoDatasetA % 'AOD', recoDatasetA % 'DQM']), set([genDatasetB]), set([recoDatasetB % 'AOD', recoDatasetB % 'DQM'])] for sub in subList.getSubscriptionList(): self.assertTrue(set(sub.getDatasetPaths()) in goldenDatasetLists) subList.compact() # Compact should have reduced 2 of them to 1 goldenNodeLists = [set(["T1_US_FNAL"]), set(["T2_IT_Bari", "T2_CH_CERN"]), set(["T1_IT_CNAF"]), set(["T1_DE_KIT"]), set(["T2_US_Wisconsin"])] self.assertEqual(len(subList.getSubscriptionList()), 5) for sub in subList.getSubscriptionList(): self.assertTrue(set(sub.getNodes()) in goldenNodeLists) return