Python SiteDBJSON.phEDExNodetocmsName примеры использования

Язык программирования: Python

Пространство имен/Пакет: WMCore.Services.SiteDB.SiteDB

Класс/Тип: SiteDBJSON

Метод/Функция: phEDExNodetocmsName

Примеров на hotexamples.com: 13

Python SiteDBJSON.phEDExNodetocmsName - 13 примеров найдено. Это лучшие примеры Python кода для WMCore.Services.SiteDB.SiteDB.SiteDBJSON.phEDExNodetocmsName, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SiteDBJSON(30)

getAllCMSNames(11)

PNNstoPSNs(10)

dnUserName(10)

PNNtoPSN(10)

cmsNametoPhEDExNode(8)

seToCMSName(7)

phEDExNodetocmsName(7)

getAllSENames(6)

cmsNametoSE(6)

cmsNametoList(4)

getAllPhEDExNodeNames(3)

checkAndConvertSENameToPNN(3)

cmsNametoCE(2)

getJSON(1)

PSNtoPNNMap(1)

userNameDn(1)

Пример #1

Показать файл

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.tier0Mode = getattr(config.PhEDExInjector, "tier0Mode", False)

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}        
        self.phedexNodes = {'MSS':[], 'Disk':[]}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.getUnsubscribedBlocks = daofactory(classname = "GetUnsubscribedBlocks")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in [ "MSS", "Disk" ]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        if self.tier0Mode:
            self.subscribeTier0Blocks()
        self.subscribeDatasets()
        return

    def subscribeTier0Blocks(self):
        """
        _subscribeTier0Blocks_

        Subscribe blocks to the Tier-0 where a replica subscription
        already exists. All Tier-0 subscriptions are move, custodial
        and autoapproved with high priority.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for candidate blocks for subscription
        blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN',
                                                               conn = myThread.transaction.conn,
                                                               transaction = True)

        if not blocksToSubscribe:
            return

        # For the blocks we don't really care about the subscription options
        # We are subscribing all blocks with the same recipe.
        subscriptionMap = {}
        for subInfo in blocksToSubscribe:
            dataset = subInfo['path']
            if dataset not in subscriptionMap:
                subscriptionMap[dataset] = []
            subscriptionMap[dataset].append(subInfo['blockname'])

        site = 'T0_CH_CERN'
        custodial = 'y'
        request_only = 'n'
        move = 'y'
        priority = 'High'

        # Get the phedex node
        phedexNode = self.cmsToPhedexMap[site]["MSS"]

        logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()])))

        newSubscription = PhEDExSubscription(subscriptionMap.keys(),
                                             phedexNode, self.group,
                                             custodial = custodial,
                                             request_only = request_only,
                                             move = move,
                                             priority = priority,
                                             level = 'block',
                                             blocks = subscriptionMap)

        # TODO: Check for blocks already subscribed

        try:
            xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                     newSubscription.getDatasetsAndBlocks())
            logging.debug(str(xmlData))
            self.phedex.subscribe(newSubscription, xmlData)
        except Exception, ex:
            logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
            logging.error("Exception: %s" % str(ex))

Пример #2

Показать файл

Файл: SiteDB_t.py Проект: zhiwenuil/WMCore

class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """

    def setUp(self):
        """
        Setup for unit tests
        """
        self.mySiteDB = SiteDBJSON()

    @attr("integration")
    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))

    @attr("integration")
    def testPhEDExNodetocmsName(self):
        """
        Tests PhEDExNodetocmsName
        """
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC')
        self.failUnless(result == 'T2_UK_London_IC')
        # don't check this anymore, see comment in phEDExNodetocmsName function
        #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName,
        #                  'T9_DOESNT_EXIST_Buffer')

    @attr("integration")
    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = ['srm-cms.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    @attr("integration")
    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = 'T1_US_FNAL'
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)

    @attr("integration")
    def testCmsNametoCE(self):
        """
        Tests CmsNametoCE
        """
        target = ['lcgce06.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk', 'lcgce09.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoCE("T1_UK_RAL")
        self.failUnless(sorted(results) == target)

    @attr("integration")
    def testJSONParser(self):
        """
        Tests the JSON parser directly
        """
        cmsName = "cmsgrid02.hep.wisc.edu"
        results = self.mySiteDB.getJSON("CEtoCMSName",
                                  file="CEtoCMSName",
                                  name=cmsName)
        self.failUnless(results['0']['name'] == "T2_US_Wisconsin")

    @attr("integration")
    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/C=UK/O=eScience/OU=Bristol/L=IS/CN=simon metson"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)
        
    @attr("integration")
    def testParsingJsonWithApostrophe(self):
        """
        Tests parsing a DN json with an apostrophe in
        """
        json = """{"dn": "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", "user": "******"}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn'])

    @attr("integration")
    def testParsingInvalidJsonWithApostrophe(self):
        """
        Tests parsing a DN invalid json (from sitedb v1) with an apostrophe in
        """
        json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano', 'user': '******'}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano", d['dn'])
        json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'', 'user': '******'}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn'])

Пример #3

Показать файл

def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint=dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset", datasetName, "parts", outputDatasetParts
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" %
                           datasetName)

    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                           subscribed='y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #get parent information about file
            blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection[
                        "RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(
                    lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {
                    "runs": runInfo,
                    "events": blockFile["NumberOfEvents"],
                    "size": blockFile["FileSize"],
                    "locations": list(blockLocations),
                    "parents": parentLFNs
                }
    return files

Пример #4

Показать файл

Файл: recover.py Проект: AlexVanSpilbeeck/WmAgentScripts

def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1],
                                                outputDatasetParts[3],
                                                outputDatasetParts[2])

    if len(datasets) == 0:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)

    blockNames = dbsReader.listFileBlocks(datasetName)
    for blockName in blockNames:
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        block = dbsReader.listFilesInBlockWithParents(blockName)
        blockLocations = set()
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        for blockFile in block:
            parentLFNs = []
            for fileParent in blockFile["ParentList"]:
                parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files

Пример #5

Показать файл

Файл: PhEDExInjectorSubscriber.py Проект: stuartw/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available    
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")
        self.getPartiallySubscribed = daofactory(classname = "GetPartiallySubscribedDatasets")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn,
                                                                              transaction = True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec})

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg = msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, False, False)
                else:
                    tupleKey = (subInfo["Priority"], True, False, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset, subscribed = 1,
                                        conn = myThread.transaction.conn,
                                        transaction = True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                if "MSS" in self.cmsToPhedexMap[site]:
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {"custodial" : "n", "requestOnly" : "y",
                           "priority" : subscriptionFlavor[0].lower(),
                           "move" : "n"}
                if subscriptionFlavor[1]:
                    options["custodial"] = "y"
                    if subscriptionFlavor[3]:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"

                newSubscription = PhEDExSubscription(datasets, phedexNode, self.group,
                                                     **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return

Пример #6

Показать файл

Файл: SiteDB_t.py Проект: cinquo/WMCore

class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """

    def setUp(self):
        """
        Setup for unit tests
        """
        EmulatorHelper.setEmulators(siteDB = True)
        self.mySiteDB = SiteDBJSON()

    def tearDown(self):
        EmulatorHelper.resetEmulators()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))

    def testPhEDExNodetocmsName(self):
        """
        Tests PhEDExNodetocmsName
        """
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC')
        self.failUnless(result == 'T2_UK_London_IC')
        # don't check this anymore, see comment in phEDExNodetocmsName function
        #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName,
        #                  'T9_DOESNT_EXIST_Buffer')

    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = ['srm-cms.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL']
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)
        target = sorted(['T2_CH_CERN', 'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.failUnless(results == target)

    def testCmsNametoCE(self):
        """
        Tests CmsNametoCE
        """
        target = ['lcgce11.gridpp.rl.ac.uk', 'lcgce10.gridpp.rl.ac.uk',
                  'lcgce02.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoCE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    @attr("integration")
    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmssrm.fnal.gov' in seNames)
        return

Пример #7

Показать файл

Файл: PhEDExInjectorSubscriber.py Проект: pietverwilligen/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """

    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}
        self.phedexNodes = {"MSS": [], "Disk": []}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(
            package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi
        )

        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        self.subscribeDatasets()
        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn=myThread.transaction.conn, transaction=True)

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo["site"]

            if site not in self.phedexNodes["MSS"] and site not in self.phedexNodes["Disk"]:

                if site not in self.cmsToPhedexMap:
                    msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                    msg += "skipping subscription: %s" % subInfo["id"]
                    logging.error(msg)
                    self.sendAlert(7, msg=msg)
                    continue

                # Get the phedex node from CMS site
                site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"]

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes["MSS"]:
                subInfo["custodial"] = "n"
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo["request_only"] = "y"

            phedexSub = PhEDExSubscription(
                subInfo["path"],
                site,
                self.group,
                priority=subInfo["priority"],
                move=subInfo["move"],
                custodial=subInfo["custodial"],
                request_only=subInfo["request_only"],
                subscriptionId=subInfo["id"],
            )

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or phedexSub.matchesExistingTransferRequest(
                self.phedex
            ):
                subscriptionsMade.append(subInfo["id"])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():
            try:
                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
                logging.debug(str(xmlData))
                msg = "Subscribing: %s to %s, with options: " % (
                    subscription.getDatasetPaths(),
                    subscription.getNodes(),
                )
                msg += "Move: %s, Custodial: %s, Request Only: %s" % (
                    subscription.move,
                    subscription.custodial,
                    subscription.request_only,
                )
                logging.info(msg)
                self.phedex.subscribe(subscription, xmlData)
            except Exception as ex:
                logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
                logging.error("Exception: %s" % str(ex))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade, conn=myThread.transaction.conn, transaction=True)

        myThread.transaction.commit()
        return

Пример #8

Показать файл

Файл: SiteDB_t.py Проект: ticoann/WMCore

class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """
    def setUp(self):
        """
        Setup for unit tests
        """
        self.mySiteDB = SiteDBJSON()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS', 'T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))

    def testPhEDExNodetocmsName(self):
        """
        Tests PhEDExNodetocmsName
        """
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC')
        self.failUnless(result == 'T2_UK_London_IC')
        # don't check this anymore, see comment in phEDExNodetocmsName function
        #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName,
        #                  'T9_DOESNT_EXIST_Buffer')

    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = ['srm-cms.gridpp.rl.ac.uk', 'srm-cms-disk.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = 'T1_US_FNAL'
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)

    def testCmsNametoCE(self):
        """
        Tests CmsNametoCE
        """
        target = [
            'lcgce09.gridpp.rl.ac.uk', 'lcgce06.gridpp.rl.ac.uk',
            'lcgce07.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk'
        ]
        results = self.mySiteDB.cmsNametoCE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testJSONParser(self):
        """
        Tests the JSON parser directly
        """
        cmsName = "cmsgrid02.hep.wisc.edu"
        results = self.mySiteDB.getJSON("CEtoCMSName",
                                        file="CEtoCMSName",
                                        name=cmsName)
        self.failUnless(results['0']['name'] == "T2_US_Wisconsin")

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/C=UK/O=eScience/OU=Bristol/L=IS/CN=simon metson"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    @attr("integration")
    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        ceNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(ceNames) > 1)
        self.assertTrue('cmssrm.fnal.gov' in ceNames)
        return

    @attr("integration")
    def testParsingJsonWithApostrophe(self):
        """
        Tests parsing a DN json with an apostrophe in
        """
        json = """{"dn": "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", "user": "******"}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals(
            "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'",
            d['dn'])

    @attr("integration")
    def testParsingInvalidJsonWithApostrophe(self):
        """
        Tests parsing a DN invalid json (from sitedb v1) with an apostrophe in
        """
        json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano', 'user': '******'}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals(
            "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano",
            d['dn'])
        json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'', 'user': '******'}"""
        d = self.mySiteDB.parser.dictParser(json)
        self.assertEquals(
            "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'",
            d['dn'])

Пример #9

Показать файл

Файл: recover-v2.py Проект: AlexVanSpilbeeck/WmAgentScripts

def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset",datasetName,"parts",outputDatasetParts   
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)
    
    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue
        
        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #get parent information about file
            blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files

Пример #10

Показать файл

Файл: recover.py Проект: ticoann/WmAgentScripts

def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1],
                                                outputDatasetParts[3],
                                                outputDatasetParts[2])

    if len(datasets) == 0:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)

    blockNames = dbsReader.listFileBlocks(datasetName)
    for blockName in blockNames:
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        block = dbsReader.listFilesInBlockWithParents(blockName)
        blockLocations = set()
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        for blockFile in block:
            parentLFNs = []
            for fileParent in blockFile["ParentList"]:
                parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files

Пример #11

Показать файл

Файл: SiteDB_t.py Проект: samircury/WMCore

class SiteDBTest(unittest.TestCase):
    """
    Unit tests for SiteScreening module
    """

    def setUp(self):
        """
        Setup for unit tests
        """
        EmulatorHelper.setEmulators(siteDB = True)
        self.mySiteDB = SiteDBJSON()

    def tearDown(self):
        EmulatorHelper.resetEmulators()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer']
        results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL")
        self.failUnless(sorted(results) == sorted(target))

    def testPhEDExNodetocmsName(self):
        """
        Tests PhEDExNodetocmsName
        """
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer')
        self.failUnless(result == 'T1_US_FNAL')
        result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC')
        self.failUnless(result == 'T2_UK_London_IC')
        # don't check this anymore, see comment in phEDExNodetocmsName function
        #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName,
        #                  'T9_DOESNT_EXIST_Buffer')

    def testCmsNametoSE(self):
        """
        Tests CmsNametoSE
        """
        target = ['srm-cms.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoSE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testCmsNamePatterntoSE(self):
        """
        Tests CmsNamePatterntoSE
        """
        target = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
        results = self.mySiteDB.cmsNametoSE("%T2_XX")
        self.failUnless(sorted(results) == sorted(target))

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = ['T1_US_FNAL']
        results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov")
        self.failUnless(results == target)
        target = sorted(['T2_CH_CERN', 'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.failUnless(results == target)

    def testCmsNametoCE(self):
        """
        Tests CmsNametoCE
        """
        target = ['lcgce11.gridpp.rl.ac.uk', 'lcgce10.gridpp.rl.ac.uk',
                  'lcgce02.gridpp.rl.ac.uk']
        results = self.mySiteDB.cmsNametoCE("T1_UK_RAL")
        self.failUnless(sorted(results) == sorted(target))

    def testCmsNamePatterntoCE(self):
        """
        Tests CmsNamePatterntoCE
        """
        target = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
        results = self.mySiteDB.cmsNametoCE("%T2_XX")
        self.failUnless(sorted(results) == sorted(target))

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    @attr("integration")
    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.failUnless(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmssrm.fnal.gov' in seNames)
        return

Пример #12

Показать файл

Файл: PhEDExInjectorSubscriber.py Проект: ticoann/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode",
                                False)
        self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available

        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")
        self.getPartiallySubscribed = daofactory(
            classname="GetPartiallySubscribedDatasets")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" %
                         (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(
            conn=myThread.transaction.conn, transaction=True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(
                conn=myThread.transaction.conn, transaction=True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({
                "workflow": workflow,
                "spec": spec
            })

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg=msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(
                        subInfo["CustodialSites"],
                        datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(
                        subInfo["NonCustodialSites"],
                        datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(
                        subInfo["AutoApproveSites"],
                        datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(
                        subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(
                set(subInfo["CustodialSites"]) -
                set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, autoApprove, False)
                else:
                    tupleKey = (subInfo["Priority"], True, autoApprove, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                # Subscriptions are sorted by options, defined by tupleKey
                # The tuple key has 3 or 4 entries in this order
                # Priority, Custodial, Auto approve, Move (True) or Replica (False)
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                # Non-custodial is never move, so this tuple has only 3 entries
                # TODO: Change tuples to frozensets for clarity
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset,
                                        subscribed=1,
                                        conn=myThread.transaction.conn,
                                        transaction=True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                isMSS = False
                if "MSS" in self.cmsToPhedexMap[site]:
                    isMSS = True
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {
                    "custodial": "n",
                    "requestOnly": "y",
                    "priority": subscriptionFlavor[0].lower(),
                    "move": "n"
                }
                if subscriptionFlavor[1] and isMSS:
                    # Custodial subscriptions are only allowed in MSS nodes
                    # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription
                    options["custodial"] = "y"
                    if subscriptionFlavor[3] and not self.replicaOnly:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"
                logging.info(
                    "Request options: Custodial - %s, Move - %s, Request Only - %s"
                    % (options["custodial"].upper(), options["move"].upper(),
                       options["requestOnly"].upper()))
                newSubscription = PhEDExSubscription(datasets, phedexNode,
                                                     self.group, **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(
                    self.dbsUrl, newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return

Пример #13

Показать файл

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}        
        self.phedexNodes = {'MSS':[], 'Disk':[]}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in [ "MSS", "Disk" ]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        self.subscribeDatasets()
        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:

                if site not in self.cmsToPhedexMap:
                    msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                    msg += "skipping subscription: %s" % subInfo['id']
                    logging.error(msg)
                    self.sendAlert(7, msg = msg)
                    continue

                # Get the phedex node from CMS site
                site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] 

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'
            
            phedexSub = PhEDExSubscription(subInfo['path'], site,
                                           self.group, priority = subInfo['priority'],
                                           move = subInfo['move'], custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'], subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():
            try:
                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           subscription.getDatasetPaths())
                logging.debug(str(xmlData))
                msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes())
                msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only)
                logging.info(msg)
                self.phedex.subscribe(subscription, xmlData)
            except Exception as ex:
                logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
                logging.error("Exception: %s" % str(ex))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade,
                                        conn = myThread.transaction.conn,
                                        transaction = True)

        myThread.transaction.commit()
        return