Exemplo n.º 1
0
class MockPhEDExApi(object):
    """
    Version of Services/PhEDEx intended to be used with mock or unittest.mock
    """

    def __init__(self, dict=None, responseType="json", logger=None,
                 dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
        print("Using MockPhEDExApi")
        self.dbsUrl = dbsUrl
        dict = dict or {}
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def sitesByBlock(self, block):
        """
        Centralize the algorithm to decide where a block is based on the hash name

        Args:
            block: the name of the block

        Returns:
            sites: a fake list of sites where the data is

        """

        if hash(block) % 3 == 0:
            sites = ['T2_XX_SiteA']
        elif hash(block) % 3 == 1:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB']
        else:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']

        return sites

    def getReplicaPhEDExNodesForBlocks(self, block=None, dataset=None, complete='y'):
        """

        Args:
            block: the name of the block
            dataset: the name of the dataset
            complete: ??

        Returns:
            a fake list of blocks and the fakes sites they are at
        """
        if isinstance(dataset, list):
            dataset = dataset[0]  # Dataset is a list in these tests
        if dataset:
            # TODO: Generalize this and maybe move dataset detection into sitesByBlock
            if dataset == PILEUP_DATASET:
                return {
                    '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']}
            else:
                try:
                    DBS3Reader(PROD_DBS).checkDatasetPath(dataset)
                    blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(dataset=dataset)
                    singleBlock = blocks[0]['block_name']
                    return {singleBlock: self.sitesByBlock(singleBlock)}
                except DBSReaderError:
                    return {'%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: []}

        replicas = {}
        for oneBlock in block:
            if oneBlock.split('#')[0] == PILEUP_DATASET:
                # Pileup is at a single site
                sites = ['T2_XX_SiteC']
                _BLOCK_LOCATIONS[oneBlock] = sites
            else:
                sites = self.sitesByBlock(block=oneBlock)
                _BLOCK_LOCATIONS[oneBlock] = sites
            replicas.update({oneBlock: sites})
        return replicas

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """

        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}

        for block in args['block']:
            blocks = data['phedex']['block']
            # files = self.dataBlocks.getFiles(block)
            # locations = self.dataBlocks.getLocation(block)
            sites = self.sitesByBlock(block=block)
            blocks.append({'files': 1, 'name': block, 'replica': [{'node': x} for x in sites]})
        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Fake version of the existing PhEDEx method
        """

        dataItems = list(set(dataItems))  # force unique items
        locationMap = {}

        for dataItem in dataItems:
            sites = self.sitesByBlock(block=dataItem)
            locationMap.update({dataItem: sites})

        return locationMap

    def getNodeMap(self):

        nodeMappings = {"phedex": {"node": []}}

        nodes = [{"name": "T1_US_FNAL_MSS", "kind": "MSS", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 1},
                 {"name": "T1_US_FNAL_Buffer", "kind": "Buffer", "se": "cmssrm.fnal.gov", "technology": "dCache",
                  "id": 2},
                 {"name": "T0_CH_CERN_MSS", "kind": "MSS", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 3},
                 {"name": "T0_CH_CERN_Buffer", "kind": "Buffer", "se": "srm-cms.cern.ch", "technology": "Castor",
                  "id": 4},
                 {"name": "T1_UK_RAL_MSS", "kind": "MSS", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor",
                  "id": 5},
                 {"name": "T1_UK_RAL_Buffer", "kind": "Buffer", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor",
                  "id": 6},
                 {"name": "T1_UK_RAL_Disk", "kind": "Disk", "se": "srm-cms-disk.gridpp.rl.ac.uk", "technology": "Disk",
                  "id": 7},
                 {"name": "T2_CH_CERN", "kind": "Disk", "se": "srm-eoscms.cern.ch", "technology": "Disk", "id": 8},
                 {"name": "T3_CO_Uniandes", "kind": "Disk", "se": "moboro.uniandes.edu.co", "technology": "DPM",
                  "id": 9}
                ]

        for node in nodes:
            nodeMappings["phedex"]["node"].append(node)

        return nodeMappings

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """

        def _blockInfoGenerator(blockList):

            for block in blockList:
                if isinstance(block, dict):
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break
                if not (datasetList and find):
                    data['phedex']['dataset'].append({'name': dataset, 'files': FILES_PER_DATASET,
                                                      'block': []})

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None,
                             'level': 'dataset', 'move': 'n', 'request': '47983',
                             'time_created': '1232989000', 'priority': 'low',
                             'time_update': None, 'node_id': '781',
                             'suspended': 'n', 'group': None})

                if dataset in self.subRequests:
                    subs.extend(self.subRequests[dataset])
                datasetSelected['subscription'] = subs
                for sub in subs:
                    if sub['level'] == 'block':
                        subs.remove(sub)

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({"bytes": "10438786614", "files": FILES_PER_BLOCK, "is_open": "n", "name": block,
                               "id": "454370",
                               "subscription": [{'node': x + '_MSS', "suspended": "n"} for x in locations]
                              })

        data = {'phedex': {"request_timestamp": 1254850198.15418,
                           'dataset': []}}

        # Different structure depending on whether we ask for dataset or blocks
        if 'dataset' in args and args['dataset']:
            blockList = self.dataBlocks.getBlocks(args['dataset'])
            _blockInfoGenerator(blockList)
        elif 'block' in args and args['block']:
            _blockInfoGenerator(args['block'])
        elif 'group' in args and args['group']:
            blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group'])
            _blockInfoGenerator(blockList)

        return data

    def getRequestList(self, **kwargs):
        """
        _getRequestList_
        Emulated request list, for now it does nothing
        """

        goldenResponse = {"phedex": {"request": [], "request_timestamp": 1368636296.94707,
                                     "request_version": "2.3.15-comp", "request_call": "requestlist",
                                     "call_time": 0.34183, "request_date": "2013-05-15 16:44:56 UTC"}}
        return goldenResponse

    def __getattr__(self, item):
        """
        __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on
        a lookup table

        :param item: The method name the user is trying to call
        :return: The generic lookup function
        """

        def genericLookup(*args, **kwargs):
            """
            This function returns the mocked DBS data

            :param args: positional arguments it was called with
            :param kwargs: named arguments it was called with
            :return: the dictionary that DBS would have returned
            """

            if kwargs:
                signature = '%s:%s' % (item, sorted(kwargs.iteritems()))
            else:
                signature = item

            try:
                if MOCK_DATA[self.url][signature] == 'Raises HTTPError':
                    raise HTTPError
                else:
                    return MOCK_DATA[self.url][signature]
            except KeyError:
                raise KeyError("PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)." %
                               (item, args, kwargs, self.url))

        return genericLookup
Exemplo n.º 2
0
class MockPhEDExApi(object):
    """
    Version of Services/PhEDEx intended to be used with mock or unittest.mock
    """
    def __init__(self,
                 dict=None,
                 responseType="json",
                 logger=None,
                 dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'):
        print("Using MockPhEDExApi")
        self.dbsUrl = dbsUrl
        dict = dict or {}
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def sitesByBlock(self, block):
        """
        Centralize the algorithm to decide where a block is based on the hash name

        Args:
            block: the name of the block

        Returns:
            sites: a fake list of sites where the data is

        """

        if hash(block) % 3 == 0:
            sites = ['T2_XX_SiteA']
        elif hash(block) % 3 == 1:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB']
        else:
            sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']

        return sites

    def getReplicaPhEDExNodesForBlocks(self,
                                       block=None,
                                       dataset=None,
                                       complete='y'):
        """

        Args:
            block: the name of the block
            dataset: the name of the dataset
            complete: ??

        Returns:
            a fake list of blocks and the fakes sites they are at
        """
        if isinstance(dataset, list):
            dataset = dataset[0]  # Dataset is a list in these tests
        if dataset:
            # TODO: Generalize this and maybe move dataset detection into sitesByBlock
            if dataset == PILEUP_DATASET:
                return {
                    '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                    ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
                }
            else:
                try:
                    DBS3Reader(PROD_DBS).checkDatasetPath(dataset)
                    blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(
                        dataset=dataset)
                    singleBlock = blocks[0]['block_name']
                    return {singleBlock: self.sitesByBlock(singleBlock)}
                except DBSReaderError:
                    return {
                        '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                        []
                    }

        replicas = {}
        for oneBlock in block:
            if oneBlock.split('#')[0] == PILEUP_DATASET:
                # Pileup is at a single site
                sites = ['T2_XX_SiteC']
                _BLOCK_LOCATIONS[oneBlock] = sites
            else:
                sites = self.sitesByBlock(block=oneBlock)
                _BLOCK_LOCATIONS[oneBlock] = sites
            replicas.update({oneBlock: sites})
        return replicas

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """

        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}

        for block in args['block']:
            blocks = data['phedex']['block']
            # files = self.dataBlocks.getFiles(block)
            # locations = self.dataBlocks.getLocation(block)
            sites = self.sitesByBlock(block=block)
            blocks.append({
                'files': 1,
                'name': block,
                'replica': [{
                    'node': x
                } for x in sites]
            })
        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Fake version of the existing PhEDEx method
        """

        dataItems = list(set(dataItems))  # force unique items
        locationMap = {}

        for dataItem in dataItems:
            sites = self.sitesByBlock(block=dataItem)
            locationMap.update({dataItem: sites})

        return locationMap

    def getNodeMap(self):

        nodeMappings = {"phedex": {"node": []}}

        nodes = [{
            "name": "T1_US_FNAL_MSS",
            "kind": "MSS",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 1
        }, {
            "name": "T1_US_FNAL_Buffer",
            "kind": "Buffer",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 2
        }, {
            "name": "T0_CH_CERN_MSS",
            "kind": "MSS",
            "se": "srm-cms.cern.ch",
            "technology": "Castor",
            "id": 3
        }, {
            "name": "T0_CH_CERN_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.cern.ch",
            "technology": "Castor",
            "id": 4
        }, {
            "name": "T1_UK_RAL_MSS",
            "kind": "MSS",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 5
        }, {
            "name": "T1_UK_RAL_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 6
        }, {
            "name": "T1_UK_RAL_Disk",
            "kind": "Disk",
            "se": "srm-cms-disk.gridpp.rl.ac.uk",
            "technology": "Disk",
            "id": 7
        }, {
            "name": "T2_CH_CERN",
            "kind": "Disk",
            "se": "srm-eoscms.cern.ch",
            "technology": "Disk",
            "id": 8
        }, {
            "name": "T3_CO_Uniandes",
            "kind": "Disk",
            "se": "moboro.uniandes.edu.co",
            "technology": "DPM",
            "id": 9
        }]

        for node in nodes:
            nodeMappings["phedex"]["node"].append(node)

        return nodeMappings

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if isinstance(block, dict):
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break
                if not (datasetList and find):
                    data['phedex']['dataset'].append({
                        'name': dataset,
                        'files': FILES_PER_DATASET,
                        'block': []
                    })

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({
                    'node': 'T2_XX_SiteA_MSS',
                    'custodial': 'n',
                    'suspend_until': None,
                    'level': 'dataset',
                    'move': 'n',
                    'request': '47983',
                    'time_created': '1232989000',
                    'priority': 'low',
                    'time_update': None,
                    'node_id': '781',
                    'suspended': 'n',
                    'group': None
                })

                if dataset in self.subRequests:
                    subs.extend(self.subRequests[dataset])
                datasetSelected['subscription'] = subs
                for sub in subs:
                    if sub['level'] == 'block':
                        subs.remove(sub)

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({
                    "bytes":
                    "10438786614",
                    "files":
                    FILES_PER_BLOCK,
                    "is_open":
                    "n",
                    "name":
                    block,
                    "id":
                    "454370",
                    "subscription": [{
                        'node': x + '_MSS',
                        "suspended": "n"
                    } for x in locations]
                })

        data = {
            'phedex': {
                "request_timestamp": 1254850198.15418,
                'dataset': []
            }
        }

        # Different structure depending on whether we ask for dataset or blocks
        if 'dataset' in args and args['dataset']:
            blockList = self.dataBlocks.getBlocks(args['dataset'])
            _blockInfoGenerator(blockList)
        elif 'block' in args and args['block']:
            _blockInfoGenerator(args['block'])
        elif 'group' in args and args['group']:
            blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group'])
            _blockInfoGenerator(blockList)

        return data

    def getRequestList(self, **kwargs):
        """
        _getRequestList_
        Emulated request list, for now it does nothing
        """

        goldenResponse = {
            "phedex": {
                "request": [],
                "request_timestamp": 1368636296.94707,
                "request_version": "2.3.15-comp",
                "request_call": "requestlist",
                "call_time": 0.34183,
                "request_date": "2013-05-15 16:44:56 UTC"
            }
        }
        return goldenResponse

    def __getattr__(self, item):
        """
        __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on
        a lookup table

        :param item: The method name the user is trying to call
        :return: The generic lookup function
        """
        def genericLookup(*args, **kwargs):
            """
            This function returns the mocked DBS data

            :param args: positional arguments it was called with
            :param kwargs: named arguments it was called with
            :return: the dictionary that DBS would have returned
            """

            if kwargs:
                signature = '%s:%s' % (item, sorted(kwargs.iteritems()))
            else:
                signature = item

            try:
                if MOCK_DATA[self.url][signature] == 'Raises HTTPError':
                    raise HTTPError
                else:
                    return MOCK_DATA[self.url][signature]
            except KeyError:
                raise KeyError(
                    "PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)."
                    % (item, args, kwargs, self.url))

        return genericLookup
Exemplo n.º 3
0
class PhEDEx(dict):
    """
    """
    def __init__(self, *args, **kwargs):
        # add the end point to prevent the existence check fails.
        self['endpoint'] = "phedex_emulator"
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}
        
    def injectBlocks(self, node, xmlData, verbose = 0, strict = 1):

        """
        do nothing don't inject block.
        """

        return None

    def getNodeSE(self, value):
        return 'dummy.se.from.emulator'

    def subscribe(self, subscription, xmlData):
        """
        Store the subscription information in the object,
        tests can retrieve it and verify it
        """

        args = {}

        args['node'] = []
        for node in subscription.nodes:
            args['node'].append(node)

        document = parseString(xmlData)
        datasets = document.getElementsByTagName("dataset")
        for dataset in datasets:
            datasetName = dataset.getAttribute("name")

        if datasetName not in self.subRequests:
            self.subRequests[datasetName] = []

        args['data'] = xmlData
        args['level'] = subscription.level
        args['priority'] = subscription.priority
        args['move'] = subscription.move
        args['static'] = subscription.static
        args['custodial'] = subscription.custodial
        args['group'] = subscription.group
        args['request_only'] = subscription.request_only

        self.subRequests[datasetName].append(args)

        return

    def getReplicaInfoForFiles(self, **args):
        """
        _getReplicaInfoForFiles_
        TODO: Need to be implemented correctly,
        Currently not used

        Retrieve file replica information from PhEDEx.

        block          block name, with '*' wildcards, can be multiple (*).  required when no lfn is specified.
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas updated since this
                    time
        create_since   unix timestamp, only return replicas created since this
                    time
        complete       y or n. if y, return only file replicas from complete block
                    replicas.  if n only return file replicas from incomplete block
                    replicas.  default is to return either.
        dist_complete  y or n.  if y, return only file replicas from blocks
                    where all file replicas are available at some node. if
                    n, return only file replicas from blocks which have
                    file replicas not available at any node.  default is
                    to return either.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                    to return either.
        group          group name.  default is to return replicas for any group.
        lfn            logical file nam
        """
        return None

    def getNodeMap(self):
        """
        _getNodeMap_

        Retrieve information about nodes known to this PhEDEx instance.  Each
        node entry will have the following keys:
          name       - PhEDEx node name
          se         - Storage element name
          kind       - Node type, e.g. 'Disk' or 'MSS'
          technology - Node technology, e.g. 'Castor'
          id         - Node id

        Return some MSS, Buffer and Disk nodes
        """

        nodeMappings = {"phedex" : {"node" : []}}

        nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_MSS",
                                               "kind" : "MSS",
                                               "se"   : "cmssrm.fnal.gov",
                                               "technology" : "dCache",
                                               "id" : 1})
        nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_Buffer",
                                               "kind" : "Buffer",
                                               "se"   : "cmssrm.fnal.gov",
                                               "technology" : "dCache",
                                               "id" : 2})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_MSS",
                                               "kind" : "MSS",
                                               "se"   : "srm-cms.gridpp.rl.ac.uk",
                                               "technology" : "Castor",
                                               "id" : 3})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Buffer",
                                               "kind" : "Buffer",
                                               "se"   : "srm-cms.gridpp.rl.ac.uk",
                                               "technology" : "Castor",
                                               "id" : 4})
        nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Disk",
                                               "kind" : "Disk",
                                               "se"   : "srm-cms-disk.gridpp.rl.ac.uk",
                                               "technology" : "Disk",
                                               "id" : 5})
        nodeMappings["phedex"]["node"].append({"name" : "T2_CH_CERN",
                                               "kind" : "Disk",
                                               "se"   : "srm-eoscms.cern.ch",
                                               "technology" : "Disk",
                                               "id" : 6})
        nodeMappings["phedex"]["node"].append({"name" : "T3_CO_Uniandes",
                                               "kind" : "Disk",
                                               "se"   : "moboro.uniandes.edu.co",
                                               "technology" : "DPM",
                                               "id" : 7})
        return nodeMappings

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """
        data = {"phedex":{"request_timestamp":1254762796.13538, "block" : []}}
        for block in args['block']:
            blocks = data['phedex']['block']
            files = self.dataBlocks.getFiles(block)
            locations = self.dataBlocks.getLocation(block)
            blocks.append({"files": len(files), "name": block,
                           'replica' : [{'node' : x + '_MSS' } for x in locations]})
        return data

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if type(block) == dict:
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break

                if not datasetList or find:
                    data['phedex']['dataset'].append({'name' : dataset, 'files' : filesInDataset,
                                                      'block' : []})


                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None,
                                 'level': 'dataset', 'move': 'n', 'request': '47983',
                                 'time_created': '1232989000', 'priority': 'low',
                                 'time_update': None, 'node_id': '781',
                                 'suspended': 'n', 'group': None})
#                subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None,
#                                 'level': 'dataset', 'move': 'n', 'request': '47983',
#                                 'time_created': '1232989000', 'priority': 'low',
#                                 'time_update': None, 'node_id': '781',
#                                 'suspended': 'n', 'group': None})
                datasetSelected['subscription'] = subs

                blocks = datasetSelected['block']
                locations= self.dataBlocks.getLocation(block)

                blocks.append({"bytes":"10438786614",
                               "files":filesInBlock,
                               "is_open":"n",
                               "name": block,
                               "id":"454370", "subscription"
                                                  :[ {'node' : x + '_MSS', "suspended" : "n"} for x in locations]
                                                        #{"priority":"normal", "request":"51253", "time_created":"1245165314",
                                                        #   "move":"n", "suspend_until":None, "node":"T2_XX_SiteA",
                                                        #   "time_update":"1228905272", "group":None, "level":"block",
                                                        #   "node_id":"641", "custodial":"n", "suspended":"n"}]
                                                    })

        data = {'phedex' : {"request_timestamp" : 1254850198.15418,
                            'dataset' : []}}
        # different structure depending on whether we ask for dataset or blocks

        if args.has_key('dataset') and args['dataset']:
            for dataset in args['dataset']:
                blockList = self.dataBlocks.getBlocks(dataset)
                _blockInfoGenerator(blockList)
        elif args.has_key('block') and args['block']:
            _blockInfoGenerator(args['block'])

        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Similar basic functionality as self.subscriptions()
        however: dataItems may be a combination of blocks or datasets and
        kwargs is passed to PhEDEx; output is parsed and returned in the form
        { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset

        The following cases are handled:
          o Input is a block and subscription is a dataset
          o Input is a block and subscription is a block
          o Input is a dataset and subscription is a dataset

        Not supported:
          o Input is a dataset but only block subscriptions exist
        """
        from collections import defaultdict
        result = defaultdict(set)
        kwargs.setdefault('suspended', 'n') # require active subscription

        dataItems = list(set(dataItems)) # force unique items

        # Hard to query all at once in one GET call, POST not cacheable
        # hence, query individually - use httplib2 caching to protect service
        for item in dataItems:

            # First query for a dataset level subscription (most common)
            # this returns block level subscriptions also.
            # Rely on httplib2 caching to not resend on every block in dataset
            kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], []
            response = self.subscriptions(**kwargs)['phedex']

            # iterate over response as can't jump to specific datasets
            for dset in response['dataset']:
                if dset['name'] != item.split('#')[0]:
                    continue
                if dset.has_key('subscription'):
                    # dataset level subscription
                    nodes = [x['node'] for x in dset['subscription']
                             if x['suspended'] == 'n']
                    result[item].update(nodes)

                #if we have a block we must check for block level subscription also
                # combine with original query when can give both dataset and block
                if item.find('#') > -1 and dset.has_key('block'):
                    for block in dset['block']:
                        if block['name'] == item:
                            nodes = [x['node'] for x in block['subscription']
                                     if x['suspended'] == 'n']
                            result[item].update(nodes)
                            break
        return result


    def emulator(self):
        return "PhEDEx emulator ...."
Exemplo n.º 4
0
class PhEDEx(dict):
    """
    """
    def __init__(self, *args, **kwargs):
        # add the end point to prevent the existence check fails.
        self['endpoint'] = "phedex_emulator"
        self.dataBlocks = DataBlockGenerator()
        self.subRequests = {}

    def injectBlocks(self, node, xmlData, verbose=0, strict=1):
        """
        do nothing don't inject block.
        """

        return None

    def getNodeSE(self, value):
        return 'dummy.se.from.emulator'

    def subscribe(self, subscription, xmlData):
        """
        Store the subscription information in the object,
        tests can retrieve it and verify it
        """

        args = {}

        args['node'] = []
        for node in subscription.nodes:
            args['node'].append(node)

        document = parseString(xmlData)
        datasets = document.getElementsByTagName("dataset")
        for dataset in datasets:
            datasetName = dataset.getAttribute("name")

        if datasetName not in self.subRequests:
            self.subRequests[datasetName] = []

        args['data'] = xmlData
        args['level'] = subscription.level
        args['priority'] = subscription.priority
        args['move'] = subscription.move
        args['static'] = subscription.static
        args['custodial'] = subscription.custodial
        args['group'] = subscription.group
        args['request_only'] = subscription.request_only

        self.subRequests[datasetName].append(args)

        return

    def getReplicaInfoForFiles(self, **args):
        """
        _getReplicaInfoForFiles_
        TODO: Need to be implemented correctly,
        Currently not used

        Retrieve file replica information from PhEDEx.

        block          block name, with '*' wildcards, can be multiple (*).  required when no lfn is specified.
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas updated since this
                    time
        create_since   unix timestamp, only return replicas created since this
                    time
        complete       y or n. if y, return only file replicas from complete block
                    replicas.  if n only return file replicas from incomplete block
                    replicas.  default is to return either.
        dist_complete  y or n.  if y, return only file replicas from blocks
                    where all file replicas are available at some node. if
                    n, return only file replicas from blocks which have
                    file replicas not available at any node.  default is
                    to return either.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                    to return either.
        group          group name.  default is to return replicas for any group.
        lfn            logical file nam
        """
        return None

    def getNodeMap(self):
        """
        _getNodeMap_

        Retrieve information about nodes known to this PhEDEx instance.  Each
        node entry will have the following keys:
          name       - PhEDEx node name
          se         - Storage element name
          kind       - Node type, e.g. 'Disk' or 'MSS'
          technology - Node technology, e.g. 'Castor'
          id         - Node id

        Return some MSS, Buffer and Disk nodes
        """

        nodeMappings = {"phedex": {"node": []}}

        nodeMappings["phedex"]["node"].append({
            "name": "T1_US_FNAL_MSS",
            "kind": "MSS",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 1
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_US_FNAL_Buffer",
            "kind": "Buffer",
            "se": "cmssrm.fnal.gov",
            "technology": "dCache",
            "id": 2
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_MSS",
            "kind": "MSS",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 3
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_Buffer",
            "kind": "Buffer",
            "se": "srm-cms.gridpp.rl.ac.uk",
            "technology": "Castor",
            "id": 4
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T1_UK_RAL_Disk",
            "kind": "Disk",
            "se": "srm-cms-disk.gridpp.rl.ac.uk",
            "technology": "Disk",
            "id": 5
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T2_CH_CERN",
            "kind": "Disk",
            "se": "srm-eoscms.cern.ch",
            "technology": "Disk",
            "id": 6
        })
        nodeMappings["phedex"]["node"].append({
            "name": "T3_CO_Uniandes",
            "kind": "Disk",
            "se": "moboro.uniandes.edu.co",
            "technology": "DPM",
            "id": 7
        })
        return nodeMappings

    def getReplicaInfoForBlocks(self, **args):
        """
        Where are blocks located
        """
        data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}}
        for block in args['block']:
            blocks = data['phedex']['block']
            files = self.dataBlocks.getFiles(block)
            locations = self.dataBlocks.getLocation(block)
            blocks.append({
                "files": len(files),
                "name": block,
                'replica': [{
                    'node': x + '_MSS'
                } for x in locations]
            })
        return data

    def subscriptions(self, **args):
        """
        Where is data subscribed - for now just replicate blockreplicas
        """
        def _blockInfoGenerator(blockList):

            for block in blockList:
                if type(block) == dict:
                    block = block['Name']
                dataset = self.dataBlocks.getDatasetName(block)
                # TODO needs to add correct file numbers
                datasetList = data['phedex']['dataset']
                if datasetList:
                    find = False
                    for dataItem in datasetList:
                        if dataItem['name'] == dataset:
                            datasetSelected = dataItem
                            find = True
                            break

                if not datasetList or find:
                    data['phedex']['dataset'].append({
                        'name': dataset,
                        'files': filesInDataset,
                        'block': []
                    })

                    datasetSelected = data['phedex']['dataset'][-1]
                subs = []
                subs.append({
                    'node': 'T2_XX_SiteA_MSS',
                    'custodial': 'n',
                    'suspend_until': None,
                    'level': 'dataset',
                    'move': 'n',
                    'request': '47983',
                    'time_created': '1232989000',
                    'priority': 'low',
                    'time_update': None,
                    'node_id': '781',
                    'suspended': 'n',
                    'group': None
                })
                #                subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None,
                #                                 'level': 'dataset', 'move': 'n', 'request': '47983',
                #                                 'time_created': '1232989000', 'priority': 'low',
                #                                 'time_update': None, 'node_id': '781',
                #                                 'suspended': 'n', 'group': None})
                datasetSelected['subscription'] = subs

                blocks = datasetSelected['block']
                locations = self.dataBlocks.getLocation(block)

                blocks.append({
                    "bytes":
                    "10438786614",
                    "files":
                    filesInBlock,
                    "is_open":
                    "n",
                    "name":
                    block,
                    "id":
                    "454370",
                    "subscription": [{
                        'node': x + '_MSS',
                        "suspended": "n"
                    } for x in locations]
                    #{"priority":"normal", "request":"51253", "time_created":"1245165314",
                    #   "move":"n", "suspend_until":None, "node":"T2_XX_SiteA",
                    #   "time_update":"1228905272", "group":None, "level":"block",
                    #   "node_id":"641", "custodial":"n", "suspended":"n"}]
                })

        data = {
            'phedex': {
                "request_timestamp": 1254850198.15418,
                'dataset': []
            }
        }
        # different structure depending on whether we ask for dataset or blocks

        if args.has_key('dataset') and args['dataset']:
            for dataset in args['dataset']:
                blockList = self.dataBlocks.getBlocks(dataset)
                _blockInfoGenerator(blockList)
        elif args.has_key('block') and args['block']:
            _blockInfoGenerator(args['block'])

        return data

    def getSubscriptionMapping(self, *dataItems, **kwargs):
        """
        Similar basic functionality as self.subscriptions()
        however: dataItems may be a combination of blocks or datasets and
        kwargs is passed to PhEDEx; output is parsed and returned in the form
        { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset

        The following cases are handled:
          o Input is a block and subscription is a dataset
          o Input is a block and subscription is a block
          o Input is a dataset and subscription is a dataset

        Not supported:
          o Input is a dataset but only block subscriptions exist
        """
        from collections import defaultdict
        result = defaultdict(set)
        kwargs.setdefault('suspended', 'n')  # require active subscription

        dataItems = list(set(dataItems))  # force unique items

        # Hard to query all at once in one GET call, POST not cacheable
        # hence, query individually - use httplib2 caching to protect service
        for item in dataItems:

            # First query for a dataset level subscription (most common)
            # this returns block level subscriptions also.
            # Rely on httplib2 caching to not resend on every block in dataset
            kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], []
            response = self.subscriptions(**kwargs)['phedex']

            # iterate over response as can't jump to specific datasets
            for dset in response['dataset']:
                if dset['name'] != item.split('#')[0]:
                    continue
                if dset.has_key('subscription'):
                    # dataset level subscription
                    nodes = [
                        x['node'] for x in dset['subscription']
                        if x['suspended'] == 'n'
                    ]
                    result[item].update(nodes)

                #if we have a block we must check for block level subscription also
                # combine with original query when can give both dataset and block
                if item.find('#') > -1 and dset.has_key('block'):
                    for block in dset['block']:
                        if block['name'] == item:
                            nodes = [
                                x['node'] for x in block['subscription']
                                if x['suspended'] == 'n'
                            ]
                            result[item].update(nodes)
                            break
        return result

    def emulator(self):
        return "PhEDEx emulator ...."