Exemplos de DataBlockGenerator em Python, exemplos de WMQuality.Emulators.DataBlockGenerator.DataBlockGenerator.DataBlockGenerator em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: PhEDEx.py Projeto: tsarangi/WMCore

 def __init__(self, *args, **kwargs):
     # add the end point to prevent the existence check fails.
     self['endpoint'] = "phedex_emulator"
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}
     self.deletionRequests = {}
     self.deletionRequestId = 0

Exemplo n.º 2

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: prozober/WMCore

    def listFiles(self, datasetPath, retriveList):
        res = []
        dbg = DataBlockGenerator()
        for block in dbg.getBlocks(datasetPath):
            files = dbg.getFiles(block['Name'])
            for f in files:
                f['Block'] = block
                res.append(f)

        return res

Exemplo n.º 3

0

Exibir arquivo

 def __init__(self,
              dict=None,
              responseType="json",
              logger=None,
              dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
     print("Using MockPhEDExApi")
     self.dbsUrl = dbsUrl
     dict = dict or {}
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: AndrewLevin/WMCore

    def listFiles(self, datasetPath, retriveList):
        res = []
        dbg = DataBlockGenerator()
        for block in dbg.getBlocks(datasetPath):
            files = dbg.getFiles(block['Name'])
            for f in files:
                f['Block'] = block
                res.append(f)

        return res

Exemplo n.º 5

0

Exibir arquivo

Arquivo: PhEDEx.py Projeto: mialiu149/WMCore

 def __init__(self, *args, **kwargs):
     # add the end point to prevent the existence check fails.
     self['endpoint'] = "phedex_emulator"
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}
     self.deletionRequests = {}
     self.deletionRequestId = 0

Exemplo n.º 6

0

Exibir arquivo

Arquivo: MockPhEDExApi.py Projeto: amaltaro/WMCore

 def __init__(self, dict=None, responseType="json", logger=None,
              dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
     print("Using MockPhEDExApi")
     self.dbsUrl = dbsUrl
     dict = dict or {}
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: PerilousApricot/CRAB2

class DBSReader:
    """
    Mock up dbs access
    """
    def __init__(self, *args, **kwargs):
        print "Using DBS Emulator ..."
        self.dataBlocks = DataBlockGenerator()
        
    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True):
        """Fake block info"""
        return self.dataBlocks.getBlocks(dataset)

    def listFileBlockLocation(self, block):
        """Fake locations"""
        return self.dataBlocks.getLocation(block)

    def listFilesInBlock(self, block):
        """Fake files"""
        return self.dataBlocks.getFiles(block)

    def getFileBlock(self, block):
        """Return block + locations"""
        result = { block : {
            "StorageElements" : self.listFileBlockLocation(block),
            "Files" : self.listFilesInBlock(block),
            "IsOpen" : False,
            }
                   }
        return result

    def getDatasetInfo(self, dataset):
        """Dataset summary"""
        result = {}
        result['number_of_events'] = sum([x['NumberOfEvents'] 
                                for x in self.dataBlocks.getBlocks(dataset)])
        result['number_of_files'] = sum([x['NumberOfFiles'] 
                                for x in self.dataBlocks.getBlocks(dataset)])
        result['path'] = dataset
        return result

Exemplo n.º 8

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: zhiwenuil/WMCore

class DBSReader:
    """
    Mock up dbs access
    """
    def __init__(self, url, **contact):
        self.dataBlocks = DataBlockGenerator()
        args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
        self.dbs = _MockDBSApi(args)
        
    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True,
                          blockName = '*', locations = True):

        """Fake block info"""
        blocks = [x for x in self.dataBlocks.getBlocks(dataset)
                if x['Name'] == blockName or blockName == '*']
        if locations:
            for block in blocks:
                block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \
                                               self.listFileBlockLocation(block['Name'])]
        return blocks

    def listFileBlockLocation(self, block):
        """Fake locations"""
        return self.dataBlocks.getLocation(block)

    def listFilesInBlock(self, block):
        """Fake files"""
        return self.dataBlocks.getFiles(block)

    def listFilesInBlockWithParents(self, block):
        return self.dataBlocks.getFiles(block, True)

    def getFileBlock(self, block):
        """Return block + locations"""
        result = { block : {
            "StorageElements" : self.listFileBlockLocation(block),
            "Files" : self.listFilesInBlock(block),
            "IsOpen" : False,
            }
                }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """

        result = { fileBlockName: {
            "StorageElements" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlockWithParents(fileBlockName),
            "IsOpen" : False,

            }
                   }
        return result

    def listRuns(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = []
            for x in self.dataBlocks.getFiles(b):
                results.extend([y['RunNumber'] for y in x['LumiList']])
            return results

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = []
            for block in self.dataBlocks.getBlocks(dataset):
                runs.extend(getRunsFromBlock(block['Name']))
            return runs
        return None


    def getDBSSummaryInfo(self, dataset=None, block=None):

        """Dataset summary"""
        def getLumisectionsInBlock(b):
            lumis = set()
            for file in self.dataBlocks.getFiles(b):
                for x in file['LumiList']:
                    lumis.add(x['LumiSectionNumber'])
            return lumis

        result = {}
        if block:
            result['NumberOfEvents'] = sum([x['NumberOfEvents']
                                for x in self.dataBlocks.getFiles(block)])
            result['NumberOfFiles'] = len(self.dataBlocks.getFiles(block))

            result['NumberOfLumis'] = len(getLumisectionsInBlock(block))

            result['path'] = dataset

        if dataset:
            if self.dataBlocks.getBlocks(dataset):
                result['NumberOfEvents'] = sum([x['NumberOfEvents']
                                    for x in self.dataBlocks.getBlocks(dataset)])
                result['NumberOfFiles'] = sum([x['NumberOfFiles']
                                    for x in self.dataBlocks.getBlocks(dataset)])
                lumis = set()
                for b in self.dataBlocks.getBlocks(dataset):
                    lumis = lumis.union(getLumisectionsInBlock(b['Name']))

                result['NumberOfLumis'] = len(lumis)
                result['path'] = dataset

        return result

    def listBlockParents(self, block):
        return self.dataBlocks.getParentBlock(block, 1)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: MockPhEDExApi.py Projeto: amaltaro/WMCore

class MockPhEDExApi(object):
    """
    Version of Services/PhEDEx intended to be used with mock or unittest.mock
    """

    def __init__(self, dict=None, responseType="json", logger=None,
                 dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
        print("Using MockPhEDExApi")
        self.dbsUrl = dbsUrl
        dict = dict or {}
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def sitesByBlock(self, block):
        """
        Centralize the algorithm to decide where a block is based on the hash name

        Args:
            block: the name of the block

        Returns:
            sites: a fake list of sites where the data is

        """

        if hash(block) % 3 == 0:
            sites = ['T2_XX_SiteA']
        elif hash(block) % 3 == 1:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB']
        else:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']

        return sites

    def getReplicaPhEDExNodesForBlocks(self, block=None, dataset=None, complete='y'):
        """

        Args:
            block: the name of the block
            dataset: the name of the dataset
            complete: ??

        Returns:
            a fake list of blocks and the fakes sites they are at
        """
        if isinstance(dataset, list):
            dataset = dataset[0]  # Dataset is a list in these tests
        if dataset:
            # TODO: Generalize this and maybe move dataset detection into sitesByBlock
            if dataset == PILEUP_DATASET:
                return {
                    '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']}
            else:
                try:
                    DBS3Reader(PROD_DBS).checkDatasetPath(dataset)
                    blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(dataset=dataset)
                    singleBlock = blocks[0]['block_name']
                    return {singleBlock: self.sitesByBlock(singleBlock)}
                except DBSReaderError:
                    return {'%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: []}

        replicas = {}
        for oneBlock in block:
            if oneBlock.split('#')[0] == PILEUP_DATASET:
                # Pileup is at a single site
                sites = ['T2_XX_SiteC']
                _BLOCK_LOCATIONS[oneBlock] = sites
            else:
                sites = self.sitesByBlock(block=oneBlock)
                _BLOCK_LOCATIONS[oneBlock] = sites
            replicas.update({oneBlock: sites})
        return replicas

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """

        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}

        for block in args['block']:
            blocks = data['phedex']['block']
            # files = self.dataBlocks.getFiles(block)
            # locations = self.dataBlocks.getLocation(block)
            sites = self.sitesByBlock(block=block)
            blocks.append({'files': 1, 'name': block, 'replica': [{'node': x} for x in sites]})
        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Fake version of the existing PhEDEx method
        """

        dataItems = list(set(dataItems))  # force unique items
        locationMap = {}

        for dataItem in dataItems:
            sites = self.sitesByBlock(block=dataItem)
            locationMap.update({dataItem: sites})

        return locationMap

    def getNodeMap(self):

        nodeMappings = {"phedex": {"node": []}}

        nodes = [{"name": "T1_US_FNAL_MSS", "kind": "MSS", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 1},
                 {"name": "T1_US_FNAL_Buffer", "kind": "Buffer", "se": "cmssrm.fnal.gov", "technology": "dCache",
                  "id": 2},
                 {"name": "T0_CH_CERN_MSS", "kind": "MSS", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 3},
                 {"name": "T0_CH_CERN_Buffer", "kind": "Buffer", "se": "srm-cms.cern.ch", "technology": "Castor",
                  "id": 4},
                 {"name": "T1_UK_RAL_MSS", "kind": "MSS", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor",
                  "id": 5},
                 {"name": "T1_UK_RAL_Buffer", "kind": "Buffer", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor",
                  "id": 6},
                 {"name": "T1_UK_RAL_Disk", "kind": "Disk", "se": "srm-cms-disk.gridpp.rl.ac.uk", "technology": "Disk",
                  "id": 7},
                 {"name": "T2_CH_CERN", "kind": "Disk", "se": "srm-eoscms.cern.ch", "technology": "Disk", "id": 8},
                 {"name": "T3_CO_Uniandes", "kind": "Disk", "se": "moboro.uniandes.edu.co", "technology": "DPM",
                  "id": 9}
                ]

        for node in nodes:
            nodeMappings["phedex"]["node"].append(node)

        return nodeMappings

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """

        def _blockInfoGenerator(blockList):

            for block in blockList:
                if isinstance(block, dict):
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break
                if not (datasetList and find):
                    data['phedex']['dataset'].append({'name': dataset, 'files': FILES_PER_DATASET,
                                                      'block': []})

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None,
                             'level': 'dataset', 'move': 'n', 'request': '47983',
                             'time_created': '1232989000', 'priority': 'low',
                             'time_update': None, 'node_id': '781',
                             'suspended': 'n', 'group': None})

                if dataset in self.subRequests:
                    subs.extend(self.subRequests[dataset])
                datasetSelected['subscription'] = subs
                for sub in subs:
                    if sub['level'] == 'block':
                        subs.remove(sub)

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({"bytes": "10438786614", "files": FILES_PER_BLOCK, "is_open": "n", "name": block,
                               "id": "454370",
                               "subscription": [{'node': x + '_MSS', "suspended": "n"} for x in locations]
                              })

        data = {'phedex': {"request_timestamp": 1254850198.15418,
                           'dataset': []}}

        # Different structure depending on whether we ask for dataset or blocks
        if 'dataset' in args and args['dataset']:
            blockList = self.dataBlocks.getBlocks(args['dataset'])
            _blockInfoGenerator(blockList)
        elif 'block' in args and args['block']:
            _blockInfoGenerator(args['block'])
        elif 'group' in args and args['group']:
            blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group'])
            _blockInfoGenerator(blockList)

        return data

    def getRequestList(self, **kwargs):
        """
        _getRequestList_
        Emulated request list, for now it does nothing
        """

        goldenResponse = {"phedex": {"request": [], "request_timestamp": 1368636296.94707,
                                     "request_version": "2.3.15-comp", "request_call": "requestlist",
                                     "call_time": 0.34183, "request_date": "2013-05-15 16:44:56 UTC"}}
        return goldenResponse

    def __getattr__(self, item):
        """
        __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on
        a lookup table

        :param item: The method name the user is trying to call
        :return: The generic lookup function
        """

        def genericLookup(*args, **kwargs):
            """
            This function returns the mocked DBS data

            :param args: positional arguments it was called with
            :param kwargs: named arguments it was called with
            :return: the dictionary that DBS would have returned
            """

            if kwargs:
                signature = '%s:%s' % (item, sorted(kwargs.iteritems()))
            else:
                signature = item

            try:
                if MOCK_DATA[self.url][signature] == 'Raises HTTPError':
                    raise HTTPError
                else:
                    return MOCK_DATA[self.url][signature]
            except KeyError:
                raise KeyError("PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)." %
                               (item, args, kwargs, self.url))

        return genericLookup

Exemplo n.º 10

0

Exibir arquivo

Arquivo: PhEDEx.py Projeto: ticoann/WMCore

class PhEDEx(dict):
    """
    """
    def __init__(self, *args, **kwargs):
        # add the end point to prevent the existence check fails.
        self['endpoint'] = "phedex_emulator"
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}
        
    def injectBlocks(self, node, xmlData, verbose = 0, strict = 1):

        """
        do nothing don't inject block.
        """

        return None

    def getNodeSE(self, value):
        return 'dummy.se.from.emulator'

    def subscribe(self, subscription, xmlData):
        """
        Store the subscription information in the object,
        tests can retrieve it and verify it
        """

        args = {}

        args['node'] = []
        for node in subscription.nodes:
            args['node'].append(node)

        document = parseString(xmlData)
        datasets = document.getElementsByTagName("dataset")
        for dataset in datasets:
            datasetName = dataset.getAttribute("name")

        if datasetName not in self.subRequests:
            self.subRequests[datasetName] = []

        args['data'] = xmlData
        args['level'] = subscription.level
        args['priority'] = subscription.priority
        args['move'] = subscription.move
        args['static'] = subscription.static
        args['custodial'] = subscription.custodial
        args['group'] = subscription.group
        args['request_only'] = subscription.request_only

        self.subRequests[datasetName].append(args)

        return

    def getReplicaInfoForFiles(self, **args):
        """
        _getReplicaInfoForFiles_
        TODO: Need to be implemented correctly,
        Currently not used

        Retrieve file replica information from PhEDEx.

        block          block name, with '*' wildcards, can be multiple (*).  required when no lfn is specified.
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas updated since this
                    time
        create_since   unix timestamp, only return replicas created since this
                    time
        complete       y or n. if y, return only file replicas from complete block
                    replicas.  if n only return file replicas from incomplete block
                    replicas.  default is to return either.
        dist_complete  y or n.  if y, return only file replicas from blocks
                    where all file replicas are available at some node. if
                    n, return only file replicas from blocks which have
                    file replicas not available at any node.  default is
                    to return either.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                    to return either.
        group          group name.  default is to return replicas for any group.
        lfn            logical file nam
        """
        return None

    def getNodeMap(self):
        """
        _getNodeMap_

        Retrieve information about nodes known to this PhEDEx instance.  Each
        node entry will have the following keys:
          name       - PhEDEx node name
          se         - Storage element name
          kind       - Node type, e.g. 'Disk' or 'MSS'
          technology - Node technology, e.g. 'Castor'
          id         - Node id

        Return some MSS, Buffer and Disk nodes
        """

        nodeMappings = {"phedex" : {"node" : []}}

        nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_MSS",
                                               "kind" : "MSS",
                                               "se"   : "cmssrm.fnal.gov",
                                               "technology" : "dCache",
                                               "id" : 1})
        nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_Buffer",
                                               "kind" : "Buffer",
                                               "se"   : "cmssrm.fnal.gov",
                                               "technology" : "dCache",
                                               "id" : 2})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_MSS",
                                               "kind" : "MSS",
                                               "se"   : "srm-cms.gridpp.rl.ac.uk",
                                               "technology" : "Castor",
                                               "id" : 3})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Buffer",
                                               "kind" : "Buffer",
                                               "se"   : "srm-cms.gridpp.rl.ac.uk",
                                               "technology" : "Castor",
                                               "id" : 4})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Disk",
                                               "kind" : "Disk",
                                               "se"   : "srm-cms-disk.gridpp.rl.ac.uk",
                                               "technology" : "Disk",
                                               "id" : 5})
        nodeMappings["phedex"]["node"].append({"name" : "T2_CH_CERN",
                                               "kind" : "Disk",
                                               "se"   : "srm-eoscms.cern.ch",
                                               "technology" : "Disk",
                                               "id" : 6})
        nodeMappings["phedex"]["node"].append({"name" : "T3_CO_Uniandes",
                                               "kind" : "Disk",
                                               "se"   : "moboro.uniandes.edu.co",
                                               "technology" : "DPM",
                                               "id" : 7})
        return nodeMappings

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """
        data = {"phedex":{"request_timestamp":1254762796.13538, "block" : []}}
        for block in args['block']:
            blocks = data['phedex']['block']
            files = self.dataBlocks.getFiles(block)
            locations = self.dataBlocks.getLocation(block)
            blocks.append({"files": len(files), "name": block,
                           'replica' : [{'node' : x + '_MSS' } for x in locations]})
        return data

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if type(block) == dict:
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break

                if not datasetList or find:
                    data['phedex']['dataset'].append({'name' : dataset, 'files' : filesInDataset,
                                                      'block' : []})


                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None,
                                 'level': 'dataset', 'move': 'n', 'request': '47983',
                                 'time_created': '1232989000', 'priority': 'low',
                                 'time_update': None, 'node_id': '781',
                                 'suspended': 'n', 'group': None})
#                subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None,
#                                 'level': 'dataset', 'move': 'n', 'request': '47983',
#                                 'time_created': '1232989000', 'priority': 'low',
#                                 'time_update': None, 'node_id': '781',
#                                 'suspended': 'n', 'group': None})
                datasetSelected['subscription'] = subs

                blocks = datasetSelected['block']
                locations= self.dataBlocks.getLocation(block)

                blocks.append({"bytes":"10438786614",
                               "files":filesInBlock,
                               "is_open":"n",
                               "name": block,
                               "id":"454370", "subscription"
                                                  :[ {'node' : x + '_MSS', "suspended" : "n"} for x in locations]
                                                        #{"priority":"normal", "request":"51253", "time_created":"1245165314",
                                                        #   "move":"n", "suspend_until":None, "node":"T2_XX_SiteA",
                                                        #   "time_update":"1228905272", "group":None, "level":"block",
                                                        #   "node_id":"641", "custodial":"n", "suspended":"n"}]
                                                    })

        data = {'phedex' : {"request_timestamp" : 1254850198.15418,
                            'dataset' : []}}
        # different structure depending on whether we ask for dataset or blocks

        if args.has_key('dataset') and args['dataset']:
            for dataset in args['dataset']:
                blockList = self.dataBlocks.getBlocks(dataset)
                _blockInfoGenerator(blockList)
        elif args.has_key('block') and args['block']:
            _blockInfoGenerator(args['block'])

        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Similar basic functionality as self.subscriptions()
        however: dataItems may be a combination of blocks or datasets and
        kwargs is passed to PhEDEx; output is parsed and returned in the form
        { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset

        The following cases are handled:
          o Input is a block and subscription is a dataset
          o Input is a block and subscription is a block
          o Input is a dataset and subscription is a dataset

        Not supported:
          o Input is a dataset but only block subscriptions exist
        """
        from collections import defaultdict
        result = defaultdict(set)
        kwargs.setdefault('suspended', 'n') # require active subscription

        dataItems = list(set(dataItems)) # force unique items

        # Hard to query all at once in one GET call, POST not cacheable
        # hence, query individually - use httplib2 caching to protect service
        for item in dataItems:

            # First query for a dataset level subscription (most common)
            # this returns block level subscriptions also.
            # Rely on httplib2 caching to not resend on every block in dataset
            kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], []
            response = self.subscriptions(**kwargs)['phedex']

            # iterate over response as can't jump to specific datasets
            for dset in response['dataset']:
                if dset['name'] != item.split('#')[0]:
                    continue
                if dset.has_key('subscription'):
                    # dataset level subscription
                    nodes = [x['node'] for x in dset['subscription']
                             if x['suspended'] == 'n']
                    result[item].update(nodes)

                #if we have a block we must check for block level subscription also
                # combine with original query when can give both dataset and block
                if item.find('#') > -1 and dset.has_key('block'):
                    for block in dset['block']:
                        if block['name'] == item:
                            nodes = [x['node'] for x in block['subscription']
                                     if x['suspended'] == 'n']
                            result[item].update(nodes)
                            break
        return result


    def emulator(self):
        return "PhEDEx emulator ...."

Exemplo n.º 11

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: AndrewLevin/WMCore

 def __init__(self, url, **contact):
     self.dataBlocks = DataBlockGenerator()
     args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
     self.dbs = _MockDBSApi(args)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: AndrewLevin/WMCore

class DBSReader:
    """
    Mock up dbs access
    """
    def __init__(self, url, **contact):
        self.dataBlocks = DataBlockGenerator()
        args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
        self.dbs = _MockDBSApi(args)

    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True,
                          blockName = '*', locations = True):

        """Fake block info"""
        blocks = [x for x in self.dataBlocks.getBlocks(dataset)
                if x['Name'] == blockName or blockName == '*']
        if not blocks:
            # Weird error handling follows, this is what dbs does:
            # If block specified, return [], else raise DbsBadRequest error
            if blockName != '*':
                return []
            else:
                raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error')
        if locations:
            for block in blocks:
                block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \
                                               self.listFileBlockLocation(block['Name'])]
        return blocks

    def lfnsInBlock(self, fileBlockName):
        """
        _lfnsInBlock_
        Get a fake list of LFNs for the block
        """

        files = self.listFilesInBlock(fileBlockName)

        return [x['LogicalFileName'] for x in files]

    def listFileBlocks(self, dataset, onlyClosedBlocks = False,
                       blockName = '*'):
        """Get fake block names"""
        return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                                          blockName = blockName,
                                                          locations = False)]

    def listOpenFileBlocks(self, dataset):
        """
        _listOpenFileBlocks_

        Retrieve a list of open fileblock names for a dataset

        """
        return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                                          locations = False) if str(x['OpenForWriting' ]) == '1']

    def listFileBlockLocation(self, block):
        """Fake locations"""
        return self.dataBlocks.getLocation(block)

    def listFilesInBlock(self, fileBlockName):
        """Fake files"""
        return self.dataBlocks.getFiles(fileBlockName)

    def listFilesInBlockWithParents(self, block):
        return self.dataBlocks.getFiles(block, True)

    def getFileBlock(self, block):
        """Return block + locations"""
        result = { block : {
            "StorageElements" : self.listFileBlockLocation(block),
            "Files" : self.listFilesInBlock(block),
            "IsOpen" : self.dataBlocks._openForWriting(),
            }
                }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """

        result = { fileBlockName: {
            "StorageElements" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlockWithParents(fileBlockName),
            "IsOpen" : self.dataBlocks._openForWriting(),

            }
                   }
        return result

    def listRuns(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = set()
            for x in self.dataBlocks.getFiles(b):
                results = results.union([y['RunNumber'] for y in x['LumiList']])
            return list(results)

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = set()
            for block in self.dataBlocks.getBlocks(dataset):
                runs = runs.union(getRunsFromBlock(block['Name']))
            return list(runs)
        return None

    def listRunLumis(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = {}
            for x in self.dataBlocks.getFiles(b):
                for y in x['LumiList']:
                    if y['RunNumber'] not in results:
                        results[y['RunNumber']] = 0
                    results[y['RunNumber']] += 1
            return results

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = {}
            for block in self.dataBlocks.getBlocks(dataset):
                updateRuns = getRunsFromBlock(block['Name'])
                for run in updateRuns:
                    if run not in runs:
                        runs[run] = 0
                    runs[run] += updateRuns[run]
            return runs
        return None



    def getDBSSummaryInfo(self, dataset=None, block=None):

        """Dataset summary"""
        def getLumisectionsInBlock(b):
            lumis = set()
            for file in self.dataBlocks.getFiles(b):
                for x in file['LumiList']:
                    lumis.add(x['LumiSectionNumber'])
            return lumis

        result = {}
        if block:
            result['NumberOfEvents'] = str(sum([x['NumberOfEvents']
                                for x in self.dataBlocks.getFiles(block)]))
            result['NumberOfFiles'] = str(len(self.dataBlocks.getFiles(block)))

            result['NumberOfLumis'] = str(len(getLumisectionsInBlock(block)))

            result['path'] = dataset
            result['block'] = block
            result['OpenForWriting'] = '1' if self.dataBlocks._openForWriting() else '0'

        if dataset:
            if self.dataBlocks.getBlocks(dataset):
                result['NumberOfEvents'] = str(sum([x['NumberOfEvents']
                                    for x in self.dataBlocks.getBlocks(dataset)]))
                result['NumberOfFiles'] = str(sum([x['NumberOfFiles']
                                    for x in self.dataBlocks.getBlocks(dataset)]))
                lumis = set()
                for b in self.dataBlocks.getBlocks(dataset):
                    lumis = lumis.union(getLumisectionsInBlock(b['Name']))

                result['NumberOfLumis'] = str(len(lumis))
                result['path'] = dataset

        # Weird error handling follows, this is what dbs does
        if not result:
            raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters')
        return result

    def listBlockParents(self, block):
        return self.dataBlocks.getParentBlock(block, 1)

    def listDatasetLocation(self, dataset):
        """
        _listDatasetLocation_

        List the SEs where there is at least a block of the given
        dataset.
        """
        blocks = self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                        blockName = '*', locations = True)

        result = set()
        for block in blocks:
            result |= set([x['Name'] for x in block['StorageElementList']])

        return list(result)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: MockRucioApi.py Projeto: ramonbrugman/WMCore

 def __init__(self, acct, hostUrl=None, authUrl=None, configDict=None):
     print("Using MockRucioApi: acct={}, url={}, authUrl={}".format(
         acct, hostUrl, authUrl))
     configDict = configDict or {}
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}

Exemplo n.º 14

0

Exibir arquivo

 def __init__(self, dict=None, responseType="json", secure=True):
     dict = dict or {}
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}

Exemplo n.º 15

0

Exibir arquivo

class MockPhEDExApi(object):
    """
    Version of Services/PhEDEx intended to be used with mock or unittest.mock
    """
    def __init__(self,
                 dict=None,
                 responseType="json",
                 logger=None,
                 dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
        print("Using MockPhEDExApi")
        self.dbsUrl = dbsUrl
        dict = dict or {}
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def sitesByBlock(self, block):
        """
        Centralize the algorithm to decide where a block is based on the hash name

        Args:
            block: the name of the block

        Returns:
            sites: a fake list of sites where the data is

        """

        if hash(block) % 3 == 0:
            sites = ['T2_XX_SiteA']
        elif hash(block) % 3 == 1:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB']
        else:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']

        return sites

    def getReplicaPhEDExNodesForBlocks(self,
                                       block=None,
                                       dataset=None,
                                       complete='y'):
        """

        Args:
            block: the name of the block
            dataset: the name of the dataset
            complete: ??

        Returns:
            a fake list of blocks and the fakes sites they are at
        """
        if isinstance(dataset, list):
            dataset = dataset[0]  # Dataset is a list in these tests
        if dataset:
            # TODO: Generalize this and maybe move dataset detection into sitesByBlock
            if dataset == PILEUP_DATASET:
                return {
                    '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                    ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
                }
            else:
                try:
                    DBS3Reader(PROD_DBS).checkDatasetPath(dataset)
                    blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(
                        dataset=dataset)
                    singleBlock = blocks[0]['block_name']
                    return {singleBlock: self.sitesByBlock(singleBlock)}
                except DBSReaderError:
                    return {
                        '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                        []
                    }

        replicas = {}
        for oneBlock in block:
            if oneBlock.split('#')[0] == PILEUP_DATASET:
                # Pileup is at a single site
                sites = ['T2_XX_SiteC']
                _BLOCK_LOCATIONS[oneBlock] = sites
            else:
                sites = self.sitesByBlock(block=oneBlock)
                _BLOCK_LOCATIONS[oneBlock] = sites
            replicas.update({oneBlock: sites})
        return replicas

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """

        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}

        for block in args['block']:
            blocks = data['phedex']['block']
            # files = self.dataBlocks.getFiles(block)
            # locations = self.dataBlocks.getLocation(block)
            sites = self.sitesByBlock(block=block)
            blocks.append({
                'files': 1,
                'name': block,
                'replica': [{
                    'node': x
                } for x in sites]
            })
        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Fake version of the existing PhEDEx method
        """

        dataItems = list(set(dataItems))  # force unique items
        locationMap = {}

        for dataItem in dataItems:
            sites = self.sitesByBlock(block=dataItem)
            locationMap.update({dataItem: sites})

        return locationMap

    def getNodeMap(self):

        nodeMappings = {"phedex": {"node": []}}

        nodes = [{
            "name": "T1_US_FNAL_MSS",
            "kind": "MSS",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 1
        }, {
            "name": "T1_US_FNAL_Buffer",
            "kind": "Buffer",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 2
        }, {
            "name": "T0_CH_CERN_MSS",
            "kind": "MSS",
            "se": "srm-cms.cern.ch",
            "technology": "Castor",
            "id": 3
        }, {
            "name": "T0_CH_CERN_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.cern.ch",
            "technology": "Castor",
            "id": 4
        }, {
            "name": "T1_UK_RAL_MSS",
            "kind": "MSS",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 5
        }, {
            "name": "T1_UK_RAL_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 6
        }, {
            "name": "T1_UK_RAL_Disk",
            "kind": "Disk",
            "se": "srm-cms-disk.gridpp.rl.ac.uk",
            "technology": "Disk",
            "id": 7
        }, {
            "name": "T2_CH_CERN",
            "kind": "Disk",
            "se": "srm-eoscms.cern.ch",
            "technology": "Disk",
            "id": 8
        }, {
            "name": "T3_CO_Uniandes",
            "kind": "Disk",
            "se": "moboro.uniandes.edu.co",
            "technology": "DPM",
            "id": 9
        }]

        for node in nodes:
            nodeMappings["phedex"]["node"].append(node)

        return nodeMappings

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if isinstance(block, dict):
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break
                if not (datasetList and find):
                    data['phedex']['dataset'].append({
                        'name': dataset,
                        'files': FILES_PER_DATASET,
                        'block': []
                    })

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({
                    'node': 'T2_XX_SiteA_MSS',
                    'custodial': 'n',
                    'suspend_until': None,
                    'level': 'dataset',
                    'move': 'n',
                    'request': '47983',
                    'time_created': '1232989000',
                    'priority': 'low',
                    'time_update': None,
                    'node_id': '781',
                    'suspended': 'n',
                    'group': None
                })

                if dataset in self.subRequests:
                    subs.extend(self.subRequests[dataset])
                datasetSelected['subscription'] = subs
                for sub in subs:
                    if sub['level'] == 'block':
                        subs.remove(sub)

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({
                    "bytes":
                    "10438786614",
                    "files":
                    FILES_PER_BLOCK,
                    "is_open":
                    "n",
                    "name":
                    block,
                    "id":
                    "454370",
                    "subscription": [{
                        'node': x + '_MSS',
                        "suspended": "n"
                    } for x in locations]
                })

        data = {
            'phedex': {
                "request_timestamp": 1254850198.15418,
                'dataset': []
            }
        }

        # Different structure depending on whether we ask for dataset or blocks
        if 'dataset' in args and args['dataset']:
            blockList = self.dataBlocks.getBlocks(args['dataset'])
            _blockInfoGenerator(blockList)
        elif 'block' in args and args['block']:
            _blockInfoGenerator(args['block'])
        elif 'group' in args and args['group']:
            blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group'])
            _blockInfoGenerator(blockList)

        return data

    def getRequestList(self, **kwargs):
        """
        _getRequestList_
        Emulated request list, for now it does nothing
        """

        goldenResponse = {
            "phedex": {
                "request": [],
                "request_timestamp": 1368636296.94707,
                "request_version": "2.3.15-comp",
                "request_call": "requestlist",
                "call_time": 0.34183,
                "request_date": "2013-05-15 16:44:56 UTC"
            }
        }
        return goldenResponse

    def __getattr__(self, item):
        """
        __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on
        a lookup table

        :param item: The method name the user is trying to call
        :return: The generic lookup function
        """
        def genericLookup(*args, **kwargs):
            """
            This function returns the mocked DBS data

            :param args: positional arguments it was called with
            :param kwargs: named arguments it was called with
            :return: the dictionary that DBS would have returned
            """

            if kwargs:
                signature = '%s:%s' % (item, sorted(kwargs.iteritems()))
            else:
                signature = item

            try:
                if MOCK_DATA[self.url][signature] == 'Raises HTTPError':
                    raise HTTPError
                else:
                    return MOCK_DATA[self.url][signature]
            except KeyError:
                raise KeyError(
                    "PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)."
                    % (item, args, kwargs, self.url))

        return genericLookup

Exemplo n.º 16

0

Exibir arquivo

Arquivo: PhEDEx.py Projeto: PerilousApricot/CRAB2

class PhEDEx:
    """
    """
    def __init__(self, *args, **kwargs):
        print "Using PhEDEx Emulator ...."
        self.dataBlocks = DataBlockGenerator()

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """
        for block in args['block']:
            data = {"phedex":{"request_timestamp":1254762796.13538, "block" : []}}
            blocks = data['phedex']['block']
            files = self.dataBlocks.getFiles(block)
            locations = self.dataBlocks.getLocation(block)
            blocks.append({"files": len(files), "name": block,
                           'replica' : [{'se' : x } for x in locations]})
        return data

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        data = {'phedex' : {"request_timestamp" : 1254850198.15418,
                            'dataset' : []}}
        # different structure depending on whether we ask for dataset or blocks
        
        if args.has_key('dataset') and args['dataset']:
            for dataset in args['dataset']:
                # TODO needs to add correct file numbers
                data['phedex']['dataset'].append({'name' : dataset, 'files' : 5,
                                                  'subscription' : []})
                subs = data['phedex']['dataset'][-1]['subscription']
                    #FIXME: Take from self.locations
                subs.append({'node': 'SiteA', 'custodial': 'n', 'suspend_until': None,
                             'level': 'dataset', 'move': 'n', 'request': '47983',
                             'time_created': '1232989000', 'priority': 'low',
                             'time_update': None, 'node_id': '781',
                             'suspended': 'n', 'group': None})
            return data
        elif args.has_key('block') and args['block']:
            
            for block in args['block']:
                dataset = self.dataBlocks.getDataset('block')
                # TODO needs to add correct file numbers
                data['phedex']['dataset'].append({'name' : dataset, 'files' : 5,
                                              'block' : []})
                blocks = data['phedex']['dataset'][-1]['block']
                locations= self.dataBlocks.getLocation(block)
                        
                blocks.append({"bytes":"10438786614", "files":"5", "is_open":"n",
                               "name": args['block'],
                               "id":"454370", "subscription"
                                                  :[ {'node' : x } for x in locations]
                                                        #{"priority":"normal", "request":"51253", "time_created":"1245165314",
                                                        #   "move":"n", "suspend_until":None, "node":"SiteA",
                                                        #   "time_update":"1228905272", "group":None, "level":"block",
                                                        #   "node_id":"641", "custodial":"n", "suspended":"n"}]
                                                    })
            return data
        
    def emulator(self):
        return "PhEDEx emulator ...."

Exemplo n.º 17

0

Exibir arquivo

Arquivo: PhEDEx.py Projeto: ticoann/WMCore

class PhEDEx(dict):
    """
    """
    def __init__(self, *args, **kwargs):
        # add the end point to prevent the existence check fails.
        self['endpoint'] = "phedex_emulator"
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def injectBlocks(self, node, xmlData, verbose=0, strict=1):
        """
        do nothing don't inject block.
        """

        return None

    def getNodeSE(self, value):
        return 'dummy.se.from.emulator'

    def subscribe(self, subscription, xmlData):
        """
        Store the subscription information in the object,
        tests can retrieve it and verify it
        """

        args = {}

        args['node'] = []
        for node in subscription.nodes:
            args['node'].append(node)

        document = parseString(xmlData)
        datasets = document.getElementsByTagName("dataset")
        for dataset in datasets:
            datasetName = dataset.getAttribute("name")

        if datasetName not in self.subRequests:
            self.subRequests[datasetName] = []

        args['data'] = xmlData
        args['level'] = subscription.level
        args['priority'] = subscription.priority
        args['move'] = subscription.move
        args['static'] = subscription.static
        args['custodial'] = subscription.custodial
        args['group'] = subscription.group
        args['request_only'] = subscription.request_only

        self.subRequests[datasetName].append(args)

        return

    def getReplicaInfoForFiles(self, **args):
        """
        _getReplicaInfoForFiles_
        TODO: Need to be implemented correctly,
        Currently not used

        Retrieve file replica information from PhEDEx.

        block          block name, with '*' wildcards, can be multiple (*).  required when no lfn is specified.
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas updated since this
                    time
        create_since   unix timestamp, only return replicas created since this
                    time
        complete       y or n. if y, return only file replicas from complete block
                    replicas.  if n only return file replicas from incomplete block
                    replicas.  default is to return either.
        dist_complete  y or n.  if y, return only file replicas from blocks
                    where all file replicas are available at some node. if
                    n, return only file replicas from blocks which have
                    file replicas not available at any node.  default is
                    to return either.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                    to return either.
        group          group name.  default is to return replicas for any group.
        lfn            logical file nam
        """
        return None

    def getNodeMap(self):
        """
        _getNodeMap_

        Retrieve information about nodes known to this PhEDEx instance.  Each
        node entry will have the following keys:
          name       - PhEDEx node name
          se         - Storage element name
          kind       - Node type, e.g. 'Disk' or 'MSS'
          technology - Node technology, e.g. 'Castor'
          id         - Node id

        Return some MSS, Buffer and Disk nodes
        """

        nodeMappings = {"phedex": {"node": []}}

        nodeMappings["phedex"]["node"].append({
            "name": "T1_US_FNAL_MSS",
            "kind": "MSS",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 1
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_US_FNAL_Buffer",
            "kind": "Buffer",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 2
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_MSS",
            "kind": "MSS",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 3
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 4
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_Disk",
            "kind": "Disk",
            "se": "srm-cms-disk.gridpp.rl.ac.uk",
            "technology": "Disk",
            "id": 5
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T2_CH_CERN",
            "kind": "Disk",
            "se": "srm-eoscms.cern.ch",
            "technology": "Disk",
            "id": 6
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T3_CO_Uniandes",
            "kind": "Disk",
            "se": "moboro.uniandes.edu.co",
            "technology": "DPM",
            "id": 7
        })
        return nodeMappings

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """
        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}
        for block in args['block']:
            blocks = data['phedex']['block']
            files = self.dataBlocks.getFiles(block)
            locations = self.dataBlocks.getLocation(block)
            blocks.append({
                "files": len(files),
                "name": block,
                'replica': [{
                    'node': x + '_MSS'
                } for x in locations]
            })
        return data

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if type(block) == dict:
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break

                if not datasetList or find:
                    data['phedex']['dataset'].append({
                        'name': dataset,
                        'files': filesInDataset,
                        'block': []
                    })

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({
                    'node': 'T2_XX_SiteA_MSS',
                    'custodial': 'n',
                    'suspend_until': None,
                    'level': 'dataset',
                    'move': 'n',
                    'request': '47983',
                    'time_created': '1232989000',
                    'priority': 'low',
                    'time_update': None,
                    'node_id': '781',
                    'suspended': 'n',
                    'group': None
                })
                #                subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None,
                #                                 'level': 'dataset', 'move': 'n', 'request': '47983',
                #                                 'time_created': '1232989000', 'priority': 'low',
                #                                 'time_update': None, 'node_id': '781',
                #                                 'suspended': 'n', 'group': None})
                datasetSelected['subscription'] = subs

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({
                    "bytes":
                    "10438786614",
                    "files":
                    filesInBlock,
                    "is_open":
                    "n",
                    "name":
                    block,
                    "id":
                    "454370",
                    "subscription": [{
                        'node': x + '_MSS',
                        "suspended": "n"
                    } for x in locations]
                    #{"priority":"normal", "request":"51253", "time_created":"1245165314",
                    #   "move":"n", "suspend_until":None, "node":"T2_XX_SiteA",
                    #   "time_update":"1228905272", "group":None, "level":"block",
                    #   "node_id":"641", "custodial":"n", "suspended":"n"}]
                })

        data = {
            'phedex': {
                "request_timestamp": 1254850198.15418,
                'dataset': []
            }
        }
        # different structure depending on whether we ask for dataset or blocks

        if args.has_key('dataset') and args['dataset']:
            for dataset in args['dataset']:
                blockList = self.dataBlocks.getBlocks(dataset)
                _blockInfoGenerator(blockList)
        elif args.has_key('block') and args['block']:
            _blockInfoGenerator(args['block'])

        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Similar basic functionality as self.subscriptions()
        however: dataItems may be a combination of blocks or datasets and
        kwargs is passed to PhEDEx; output is parsed and returned in the form
        { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset

        The following cases are handled:
          o Input is a block and subscription is a dataset
          o Input is a block and subscription is a block
          o Input is a dataset and subscription is a dataset

        Not supported:
          o Input is a dataset but only block subscriptions exist
        """
        from collections import defaultdict
        result = defaultdict(set)
        kwargs.setdefault('suspended', 'n')  # require active subscription

        dataItems = list(set(dataItems))  # force unique items

        # Hard to query all at once in one GET call, POST not cacheable
        # hence, query individually - use httplib2 caching to protect service
        for item in dataItems:

            # First query for a dataset level subscription (most common)
            # this returns block level subscriptions also.
            # Rely on httplib2 caching to not resend on every block in dataset
            kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], []
            response = self.subscriptions(**kwargs)['phedex']

            # iterate over response as can't jump to specific datasets
            for dset in response['dataset']:
                if dset['name'] != item.split('#')[0]:
                    continue
                if dset.has_key('subscription'):
                    # dataset level subscription
                    nodes = [
                        x['node'] for x in dset['subscription']
                        if x['suspended'] == 'n'
                    ]
                    result[item].update(nodes)

                #if we have a block we must check for block level subscription also
                # combine with original query when can give both dataset and block
                if item.find('#') > -1 and dset.has_key('block'):
                    for block in dset['block']:
                        if block['name'] == item:
                            nodes = [
                                x['node'] for x in block['subscription']
                                if x['suspended'] == 'n'
                            ]
                            result[item].update(nodes)
                            break
        return result

    def emulator(self):
        return "PhEDEx emulator ...."

Exemplo n.º 18

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: prozober/WMCore

 def __init__(self, url, **contact):
     self.dataBlocks = DataBlockGenerator()
     args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
     self.dbs = _MockDBSApi(args)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: prozober/WMCore

class DBSReader:
    """
    Mock up dbs access
    """
    def __init__(self, url, **contact):
        self.dataBlocks = DataBlockGenerator()
        args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
        self.dbs = _MockDBSApi(args)

    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True,
                          blockName = '*', locations = True):

        """Fake block info"""
        blocks = [x for x in self.dataBlocks.getBlocks(dataset)
                if x['Name'] == blockName or blockName == '*']
        if not blocks:
            # Weird error handling follows, this is what dbs does:
            # If block specified, return [], else raise DbsBadRequest error
            if blockName != '*':
                return []
            else:
                raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error')
        if locations:
            for block in blocks:
                block['PhEDExNodeList'] = [{'Role' : '', 'Name' : x} for x in \
                                               self.listFileBlockLocation(block['Name'])]
        return blocks

    def lfnsInBlock(self, fileBlockName):
        """
        _lfnsInBlock_
        Get a fake list of LFNs for the block
        """

        files = self.listFilesInBlock(fileBlockName)

        return [x['LogicalFileName'] for x in files]

    def listFileBlocks(self, dataset, onlyClosedBlocks = False,
                       blockName = '*'):
        """Get fake block names"""
        return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                                          blockName = blockName,
                                                          locations = False)]

    def listOpenFileBlocks(self, dataset):
        """
        _listOpenFileBlocks_

        Retrieve a list of open fileblock names for a dataset

        """
        return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                                          locations = False) if str(x['OpenForWriting' ]) == '1']

    def listFileBlockLocation(self, block):
        """Fake locations"""
        return self.dataBlocks.getLocation(block)

    def listFilesInBlock(self, fileBlockName):
        """Fake files"""
        return self.dataBlocks.getFiles(fileBlockName)

    def listFilesInBlockWithParents(self, block):
        return self.dataBlocks.getFiles(block, True)

    def getFileBlock(self, block):
        """Return block + locations"""
        result = { block : {
            "PhEDExNodeNames" : self.listFileBlockLocation(block),
            "Files" : self.listFilesInBlock(block),
            "IsOpen" : self.dataBlocks._openForWriting(),
            }
                }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "PhEDExNodeNames" : [<pnn list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """

        result = { fileBlockName: {
            "PhEDExNodeNames" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlockWithParents(fileBlockName),
            "IsOpen" : self.dataBlocks._openForWriting(),

            }
                   }
        return result

    def listRuns(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = set()
            for x in self.dataBlocks.getFiles(b):
                results = results.union([y['RunNumber'] for y in x['LumiList']])
            return list(results)

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = set()
            for block in self.dataBlocks.getBlocks(dataset):
                runs = runs.union(getRunsFromBlock(block['Name']))
            return list(runs)
        return None

    def listRunLumis(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = {}
            for x in self.dataBlocks.getFiles(b):
                for y in x['LumiList']:
                    if y['RunNumber'] not in results:
                        results[y['RunNumber']] = 0
                    results[y['RunNumber']] = None  # To match DBS3
            return results

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = {}
            for block in self.dataBlocks.getBlocks(dataset):
                updateRuns = getRunsFromBlock(block['Name'])
                for run in updateRuns:
                    if run not in runs:
                        runs[run] = 0
                    runs[run] = None  # To match DBS3
            return runs
        return None


    def getDBSSummaryInfo(self, dataset=None, block=None):
        """Dataset summary"""

        def getLumisectionsInBlock(b):
            lumis = 0
            for file in self.dataBlocks.getFiles(b):
                for x in file['LumiList']:
                    lumis =+ len(x['LumiSectionNumber'])
            return lumis

        result = {}
        if block:
            result['NumberOfEvents'] = str(sum([x['NumberOfEvents']
                                for x in self.dataBlocks.getFiles(block)]))
            result['NumberOfFiles'] = str(len(self.dataBlocks.getFiles(block)))

            result['NumberOfLumis'] = str(getLumisectionsInBlock(block))

            result['path'] = dataset
            result['block'] = block
            result['OpenForWriting'] = '1' if self.dataBlocks._openForWriting() else '0'

        if dataset:
            if self.dataBlocks.getBlocks(dataset):
                result['NumberOfEvents'] = str(sum([x['NumberOfEvents']
                                    for x in self.dataBlocks.getBlocks(dataset)]))
                result['NumberOfFiles'] = str(sum([x['NumberOfFiles']
                                    for x in self.dataBlocks.getBlocks(dataset)]))
                lumis = 0
                for b in self.dataBlocks.getBlocks(dataset):
                    lumis += b['NumberOfLumis']

                result['NumberOfLumis'] = str(lumis)
                result['path'] = dataset

        # Weird error handling follows, this is what dbs does
        if not result:
            raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters')
        return result

    def listBlockParents(self, block):
        return self.dataBlocks.getParentBlock(block, 1)

    def listDatasetLocation(self, dataset):
        """
        _listDatasetLocation_

        List the SEs where there is at least a block of the given
        dataset.
        """
        blocks = self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                        blockName = '*', locations = True)

        result = set()
        for block in blocks:
            result |= set([x['Name'] for x in block['PhEDExNodeList']])

        return list(result)
    
    def getFileListByDataset(self, dataset, detail=True):
        
        return self.dbs.listFileArray(dataset)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: DBSReader.py Projeto: PerilousApricot/CRAB2

 def __init__(self, *args, **kwargs):
     print "Using DBS Emulator ..."
     self.dataBlocks = DataBlockGenerator()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: MockPhEDExApi.py Projeto: rishiloyola/WMCore

 def __init__(self, dict=None, responseType="json", secure=True):
     dict = dict or {}
     self.dataBlocks = DataBlockGenerator()
     self.subRequests = {}

Exemplo n.º 22

0

Exibir arquivo

class DBSReader:
    """
    Mock up dbs access
    """
    def __init__(self, url, **contact):
        self.dataBlocks = DataBlockGenerator()
        args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'}
        self.dbs = _MockDBSApi(args)
        
    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True,
                          blockName = '*', locations = True):

        """Fake block info"""
        blocks = [x for x in self.dataBlocks.getBlocks(dataset)
                if x['Name'] == blockName or blockName == '*']
        if not blocks:
            # Weird error handling follows, this is what dbs does:
            # If block specified, return [], else raise DbsBadRequest error
            if blockName != '*':
                return []
            else:
                raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error')
        if locations:
            for block in blocks:
                block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \
                                               self.listFileBlockLocation(block['Name'])]
        return blocks

    def listFileBlocks(self, dataset, onlyClosedBlocks = False,
                       blockName = '*'):
        """Get fake block names"""
        return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False,
                                                          blockName = blockName,
                                                          locations = False)]

    def listFileBlockLocation(self, block):
        """Fake locations"""
        return self.dataBlocks.getLocation(block)

    def listFilesInBlock(self, block):
        """Fake files"""
        return self.dataBlocks.getFiles(block)

    def listFilesInBlockWithParents(self, block):
        return self.dataBlocks.getFiles(block, True)

    def getFileBlock(self, block):
        """Return block + locations"""
        result = { block : {
            "StorageElements" : self.listFileBlockLocation(block),
            "Files" : self.listFilesInBlock(block),
            "IsOpen" : False,
            }
                }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """

        result = { fileBlockName: {
            "StorageElements" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlockWithParents(fileBlockName),
            "IsOpen" : False,

            }
                   }
        return result

    def listRuns(self, dataset = None, block = None):
        def getRunsFromBlock(b):
            results = []
            for x in self.dataBlocks.getFiles(b):
                results.extend([y['RunNumber'] for y in x['LumiList']])
            return results

        if block:
            return getRunsFromBlock(block)
        if dataset:
            runs = []
            for block in self.dataBlocks.getBlocks(dataset):
                runs.extend(getRunsFromBlock(block['Name']))
            return runs
        return None


    def getDBSSummaryInfo(self, dataset=None, block=None):

        """Dataset summary"""
        def getLumisectionsInBlock(b):
            lumis = set()
            for file in self.dataBlocks.getFiles(b):
                for x in file['LumiList']:
                    lumis.add(x['LumiSectionNumber'])
            return lumis

        result = {}
        if block:
            result['NumberOfEvents'] = sum([x['NumberOfEvents']
                                for x in self.dataBlocks.getFiles(block)])
            result['NumberOfFiles'] = len(self.dataBlocks.getFiles(block))

            result['NumberOfLumis'] = len(getLumisectionsInBlock(block))

            result['path'] = dataset
            result['block'] = block

        if dataset:
            if self.dataBlocks.getBlocks(dataset):
                result['NumberOfEvents'] = sum([x['NumberOfEvents']
                                    for x in self.dataBlocks.getBlocks(dataset)])
                result['NumberOfFiles'] = sum([x['NumberOfFiles']
                                    for x in self.dataBlocks.getBlocks(dataset)])
                lumis = set()
                for b in self.dataBlocks.getBlocks(dataset):
                    lumis = lumis.union(getLumisectionsInBlock(b['Name']))

                result['NumberOfLumis'] = len(lumis)
                result['path'] = dataset

        # Weird error handling follows, this is what dbs does
        if not result:
            raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters')
        return result

    def listBlockParents(self, block):
        return self.dataBlocks.getParentBlock(block, 1)