def __init__(self, *args, **kwargs): # add the end point to prevent the existence check fails. self['endpoint'] = "phedex_emulator" self.dataBlocks = DataBlockGenerator() self.subRequests = {} self.deletionRequests = {} self.deletionRequestId = 0
def listFiles(self, datasetPath, retriveList): res = [] dbg = DataBlockGenerator() for block in dbg.getBlocks(datasetPath): files = dbg.getFiles(block['Name']) for f in files: f['Block'] = block res.append(f) return res
def __init__(self, dict=None, responseType="json", logger=None, dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'): print("Using MockPhEDExApi") self.dbsUrl = dbsUrl dict = dict or {} self.dataBlocks = DataBlockGenerator() self.subRequests = {}
class DBSReader: """ Mock up dbs access """ def __init__(self, *args, **kwargs): print "Using DBS Emulator ..." self.dataBlocks = DataBlockGenerator() def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True): """Fake block info""" return self.dataBlocks.getBlocks(dataset) def listFileBlockLocation(self, block): """Fake locations""" return self.dataBlocks.getLocation(block) def listFilesInBlock(self, block): """Fake files""" return self.dataBlocks.getFiles(block) def getFileBlock(self, block): """Return block + locations""" result = { block : { "StorageElements" : self.listFileBlockLocation(block), "Files" : self.listFilesInBlock(block), "IsOpen" : False, } } return result def getDatasetInfo(self, dataset): """Dataset summary""" result = {} result['number_of_events'] = sum([x['NumberOfEvents'] for x in self.dataBlocks.getBlocks(dataset)]) result['number_of_files'] = sum([x['NumberOfFiles'] for x in self.dataBlocks.getBlocks(dataset)]) result['path'] = dataset return result
class DBSReader: """ Mock up dbs access """ def __init__(self, url, **contact): self.dataBlocks = DataBlockGenerator() args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'} self.dbs = _MockDBSApi(args) def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True, blockName = '*', locations = True): """Fake block info""" blocks = [x for x in self.dataBlocks.getBlocks(dataset) if x['Name'] == blockName or blockName == '*'] if locations: for block in blocks: block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \ self.listFileBlockLocation(block['Name'])] return blocks def listFileBlockLocation(self, block): """Fake locations""" return self.dataBlocks.getLocation(block) def listFilesInBlock(self, block): """Fake files""" return self.dataBlocks.getFiles(block) def listFilesInBlockWithParents(self, block): return self.dataBlocks.getFiles(block, True) def getFileBlock(self, block): """Return block + locations""" result = { block : { "StorageElements" : self.listFileBlockLocation(block), "Files" : self.listFilesInBlock(block), "IsOpen" : False, } } return result def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "StorageElements" : [<se list>], "Files" : dictionaries representing each file } } files """ result = { fileBlockName: { "StorageElements" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlockWithParents(fileBlockName), "IsOpen" : False, } } return result def listRuns(self, dataset = None, block = None): def getRunsFromBlock(b): results = [] for x in self.dataBlocks.getFiles(b): results.extend([y['RunNumber'] for y in x['LumiList']]) return results if block: return getRunsFromBlock(block) if dataset: runs = [] for block in self.dataBlocks.getBlocks(dataset): runs.extend(getRunsFromBlock(block['Name'])) return runs return None def getDBSSummaryInfo(self, dataset=None, block=None): """Dataset summary""" def getLumisectionsInBlock(b): lumis = set() for file in self.dataBlocks.getFiles(b): for x in file['LumiList']: lumis.add(x['LumiSectionNumber']) return lumis result = {} if block: result['NumberOfEvents'] = sum([x['NumberOfEvents'] for x in self.dataBlocks.getFiles(block)]) result['NumberOfFiles'] = len(self.dataBlocks.getFiles(block)) result['NumberOfLumis'] = len(getLumisectionsInBlock(block)) result['path'] = dataset if dataset: if self.dataBlocks.getBlocks(dataset): result['NumberOfEvents'] = sum([x['NumberOfEvents'] for x in self.dataBlocks.getBlocks(dataset)]) result['NumberOfFiles'] = sum([x['NumberOfFiles'] for x in self.dataBlocks.getBlocks(dataset)]) lumis = set() for b in self.dataBlocks.getBlocks(dataset): lumis = lumis.union(getLumisectionsInBlock(b['Name'])) result['NumberOfLumis'] = len(lumis) result['path'] = dataset return result def listBlockParents(self, block): return self.dataBlocks.getParentBlock(block, 1)
class MockPhEDExApi(object): """ Version of Services/PhEDEx intended to be used with mock or unittest.mock """ def __init__(self, dict=None, responseType="json", logger=None, dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'): print("Using MockPhEDExApi") self.dbsUrl = dbsUrl dict = dict or {} self.dataBlocks = DataBlockGenerator() self.subRequests = {} def sitesByBlock(self, block): """ Centralize the algorithm to decide where a block is based on the hash name Args: block: the name of the block Returns: sites: a fake list of sites where the data is """ if hash(block) % 3 == 0: sites = ['T2_XX_SiteA'] elif hash(block) % 3 == 1: sites = ['T2_XX_SiteA', 'T2_XX_SiteB'] else: sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] return sites def getReplicaPhEDExNodesForBlocks(self, block=None, dataset=None, complete='y'): """ Args: block: the name of the block dataset: the name of the dataset complete: ?? Returns: a fake list of blocks and the fakes sites they are at """ if isinstance(dataset, list): dataset = dataset[0] # Dataset is a list in these tests if dataset: # TODO: Generalize this and maybe move dataset detection into sitesByBlock if dataset == PILEUP_DATASET: return { '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']} else: try: DBS3Reader(PROD_DBS).checkDatasetPath(dataset) blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(dataset=dataset) singleBlock = blocks[0]['block_name'] return {singleBlock: self.sitesByBlock(singleBlock)} except DBSReaderError: return {'%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: []} replicas = {} for oneBlock in block: if oneBlock.split('#')[0] == PILEUP_DATASET: # Pileup is at a single site sites = ['T2_XX_SiteC'] _BLOCK_LOCATIONS[oneBlock] = sites else: sites = self.sitesByBlock(block=oneBlock) _BLOCK_LOCATIONS[oneBlock] = sites replicas.update({oneBlock: sites}) return replicas def getReplicaInfoForBlocks(self, **args): """ Where are blocks located """ data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}} for block in args['block']: blocks = data['phedex']['block'] # files = self.dataBlocks.getFiles(block) # locations = self.dataBlocks.getLocation(block) sites = self.sitesByBlock(block=block) blocks.append({'files': 1, 'name': block, 'replica': [{'node': x} for x in sites]}) return data def getSubscriptionMapping(self, *dataItems, **kwargs): """ Fake version of the existing PhEDEx method """ dataItems = list(set(dataItems)) # force unique items locationMap = {} for dataItem in dataItems: sites = self.sitesByBlock(block=dataItem) locationMap.update({dataItem: sites}) return locationMap def getNodeMap(self): nodeMappings = {"phedex": {"node": []}} nodes = [{"name": "T1_US_FNAL_MSS", "kind": "MSS", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 1}, {"name": "T1_US_FNAL_Buffer", "kind": "Buffer", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 2}, {"name": "T0_CH_CERN_MSS", "kind": "MSS", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 3}, {"name": "T0_CH_CERN_Buffer", "kind": "Buffer", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 4}, {"name": "T1_UK_RAL_MSS", "kind": "MSS", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 5}, {"name": "T1_UK_RAL_Buffer", "kind": "Buffer", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 6}, {"name": "T1_UK_RAL_Disk", "kind": "Disk", "se": "srm-cms-disk.gridpp.rl.ac.uk", "technology": "Disk", "id": 7}, {"name": "T2_CH_CERN", "kind": "Disk", "se": "srm-eoscms.cern.ch", "technology": "Disk", "id": 8}, {"name": "T3_CO_Uniandes", "kind": "Disk", "se": "moboro.uniandes.edu.co", "technology": "DPM", "id": 9} ] for node in nodes: nodeMappings["phedex"]["node"].append(node) return nodeMappings def subscriptions(self, **args): """ Where is data subscribed - for now just replicate blockreplicas """ def _blockInfoGenerator(blockList): for block in blockList: if isinstance(block, dict): block = block['Name'] dataset = self.dataBlocks.getDatasetName(block) # TODO needs to add correct file numbers datasetList = data['phedex']['dataset'] if datasetList: find = False for dataItem in datasetList: if dataItem['name'] == dataset: datasetSelected = dataItem find = True break if not (datasetList and find): data['phedex']['dataset'].append({'name': dataset, 'files': FILES_PER_DATASET, 'block': []}) datasetSelected = data['phedex']['dataset'][-1] subs = [] subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None, 'level': 'dataset', 'move': 'n', 'request': '47983', 'time_created': '1232989000', 'priority': 'low', 'time_update': None, 'node_id': '781', 'suspended': 'n', 'group': None}) if dataset in self.subRequests: subs.extend(self.subRequests[dataset]) datasetSelected['subscription'] = subs for sub in subs: if sub['level'] == 'block': subs.remove(sub) blocks = datasetSelected['block'] locations = self.dataBlocks.getLocation(block) blocks.append({"bytes": "10438786614", "files": FILES_PER_BLOCK, "is_open": "n", "name": block, "id": "454370", "subscription": [{'node': x + '_MSS', "suspended": "n"} for x in locations] }) data = {'phedex': {"request_timestamp": 1254850198.15418, 'dataset': []}} # Different structure depending on whether we ask for dataset or blocks if 'dataset' in args and args['dataset']: blockList = self.dataBlocks.getBlocks(args['dataset']) _blockInfoGenerator(blockList) elif 'block' in args and args['block']: _blockInfoGenerator(args['block']) elif 'group' in args and args['group']: blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group']) _blockInfoGenerator(blockList) return data def getRequestList(self, **kwargs): """ _getRequestList_ Emulated request list, for now it does nothing """ goldenResponse = {"phedex": {"request": [], "request_timestamp": 1368636296.94707, "request_version": "2.3.15-comp", "request_call": "requestlist", "call_time": 0.34183, "request_date": "2013-05-15 16:44:56 UTC"}} return goldenResponse def __getattr__(self, item): """ __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on a lookup table :param item: The method name the user is trying to call :return: The generic lookup function """ def genericLookup(*args, **kwargs): """ This function returns the mocked DBS data :param args: positional arguments it was called with :param kwargs: named arguments it was called with :return: the dictionary that DBS would have returned """ if kwargs: signature = '%s:%s' % (item, sorted(kwargs.iteritems())) else: signature = item try: if MOCK_DATA[self.url][signature] == 'Raises HTTPError': raise HTTPError else: return MOCK_DATA[self.url][signature] except KeyError: raise KeyError("PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)." % (item, args, kwargs, self.url)) return genericLookup
class PhEDEx(dict): """ """ def __init__(self, *args, **kwargs): # add the end point to prevent the existence check fails. self['endpoint'] = "phedex_emulator" self.dataBlocks = DataBlockGenerator() self.subRequests = {} def injectBlocks(self, node, xmlData, verbose = 0, strict = 1): """ do nothing don't inject block. """ return None def getNodeSE(self, value): return 'dummy.se.from.emulator' def subscribe(self, subscription, xmlData): """ Store the subscription information in the object, tests can retrieve it and verify it """ args = {} args['node'] = [] for node in subscription.nodes: args['node'].append(node) document = parseString(xmlData) datasets = document.getElementsByTagName("dataset") for dataset in datasets: datasetName = dataset.getAttribute("name") if datasetName not in self.subRequests: self.subRequests[datasetName] = [] args['data'] = xmlData args['level'] = subscription.level args['priority'] = subscription.priority args['move'] = subscription.move args['static'] = subscription.static args['custodial'] = subscription.custodial args['group'] = subscription.group args['request_only'] = subscription.request_only self.subRequests[datasetName].append(args) return def getReplicaInfoForFiles(self, **args): """ _getReplicaInfoForFiles_ TODO: Need to be implemented correctly, Currently not used Retrieve file replica information from PhEDEx. block block name, with '*' wildcards, can be multiple (*). required when no lfn is specified. node node name, can be multiple (*) se storage element name, can be multiple (*) update_since unix timestamp, only return replicas updated since this time create_since unix timestamp, only return replicas created since this time complete y or n. if y, return only file replicas from complete block replicas. if n only return file replicas from incomplete block replicas. default is to return either. dist_complete y or n. if y, return only file replicas from blocks where all file replicas are available at some node. if n, return only file replicas from blocks which have file replicas not available at any node. default is to return either. subscribed y or n, filter for subscription. default is to return either. custodial y or n. filter for custodial responsibility. default is to return either. group group name. default is to return replicas for any group. lfn logical file nam """ return None def getNodeMap(self): """ _getNodeMap_ Retrieve information about nodes known to this PhEDEx instance. Each node entry will have the following keys: name - PhEDEx node name se - Storage element name kind - Node type, e.g. 'Disk' or 'MSS' technology - Node technology, e.g. 'Castor' id - Node id Return some MSS, Buffer and Disk nodes """ nodeMappings = {"phedex" : {"node" : []}} nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_MSS", "kind" : "MSS", "se" : "cmssrm.fnal.gov", "technology" : "dCache", "id" : 1}) nodeMappings["phedex"]["node"].append({"name" : "T1_US_FNAL_Buffer", "kind" : "Buffer", "se" : "cmssrm.fnal.gov", "technology" : "dCache", "id" : 2}) nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_MSS", "kind" : "MSS", "se" : "srm-cms.gridpp.rl.ac.uk", "technology" : "Castor", "id" : 3}) nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Buffer", "kind" : "Buffer", "se" : "srm-cms.gridpp.rl.ac.uk", "technology" : "Castor", "id" : 4}) nodeMappings["phedex"]["node"].append({"name" : "T1_UK_RAL_Disk", "kind" : "Disk", "se" : "srm-cms-disk.gridpp.rl.ac.uk", "technology" : "Disk", "id" : 5}) nodeMappings["phedex"]["node"].append({"name" : "T2_CH_CERN", "kind" : "Disk", "se" : "srm-eoscms.cern.ch", "technology" : "Disk", "id" : 6}) nodeMappings["phedex"]["node"].append({"name" : "T3_CO_Uniandes", "kind" : "Disk", "se" : "moboro.uniandes.edu.co", "technology" : "DPM", "id" : 7}) return nodeMappings def getReplicaInfoForBlocks(self, **args): """ Where are blocks located """ data = {"phedex":{"request_timestamp":1254762796.13538, "block" : []}} for block in args['block']: blocks = data['phedex']['block'] files = self.dataBlocks.getFiles(block) locations = self.dataBlocks.getLocation(block) blocks.append({"files": len(files), "name": block, 'replica' : [{'node' : x + '_MSS' } for x in locations]}) return data def subscriptions(self, **args): """ Where is data subscribed - for now just replicate blockreplicas """ def _blockInfoGenerator(blockList): for block in blockList: if type(block) == dict: block = block['Name'] dataset = self.dataBlocks.getDatasetName(block) # TODO needs to add correct file numbers datasetList = data['phedex']['dataset'] if datasetList: find = False for dataItem in datasetList: if dataItem['name'] == dataset: datasetSelected = dataItem find = True break if not datasetList or find: data['phedex']['dataset'].append({'name' : dataset, 'files' : filesInDataset, 'block' : []}) datasetSelected = data['phedex']['dataset'][-1] subs = [] subs.append({'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None, 'level': 'dataset', 'move': 'n', 'request': '47983', 'time_created': '1232989000', 'priority': 'low', 'time_update': None, 'node_id': '781', 'suspended': 'n', 'group': None}) # subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None, # 'level': 'dataset', 'move': 'n', 'request': '47983', # 'time_created': '1232989000', 'priority': 'low', # 'time_update': None, 'node_id': '781', # 'suspended': 'n', 'group': None}) datasetSelected['subscription'] = subs blocks = datasetSelected['block'] locations= self.dataBlocks.getLocation(block) blocks.append({"bytes":"10438786614", "files":filesInBlock, "is_open":"n", "name": block, "id":"454370", "subscription" :[ {'node' : x + '_MSS', "suspended" : "n"} for x in locations] #{"priority":"normal", "request":"51253", "time_created":"1245165314", # "move":"n", "suspend_until":None, "node":"T2_XX_SiteA", # "time_update":"1228905272", "group":None, "level":"block", # "node_id":"641", "custodial":"n", "suspended":"n"}] }) data = {'phedex' : {"request_timestamp" : 1254850198.15418, 'dataset' : []}} # different structure depending on whether we ask for dataset or blocks if args.has_key('dataset') and args['dataset']: for dataset in args['dataset']: blockList = self.dataBlocks.getBlocks(dataset) _blockInfoGenerator(blockList) elif args.has_key('block') and args['block']: _blockInfoGenerator(args['block']) return data def getSubscriptionMapping(self, *dataItems, **kwargs): """ Similar basic functionality as self.subscriptions() however: dataItems may be a combination of blocks or datasets and kwargs is passed to PhEDEx; output is parsed and returned in the form { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset The following cases are handled: o Input is a block and subscription is a dataset o Input is a block and subscription is a block o Input is a dataset and subscription is a dataset Not supported: o Input is a dataset but only block subscriptions exist """ from collections import defaultdict result = defaultdict(set) kwargs.setdefault('suspended', 'n') # require active subscription dataItems = list(set(dataItems)) # force unique items # Hard to query all at once in one GET call, POST not cacheable # hence, query individually - use httplib2 caching to protect service for item in dataItems: # First query for a dataset level subscription (most common) # this returns block level subscriptions also. # Rely on httplib2 caching to not resend on every block in dataset kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], [] response = self.subscriptions(**kwargs)['phedex'] # iterate over response as can't jump to specific datasets for dset in response['dataset']: if dset['name'] != item.split('#')[0]: continue if dset.has_key('subscription'): # dataset level subscription nodes = [x['node'] for x in dset['subscription'] if x['suspended'] == 'n'] result[item].update(nodes) #if we have a block we must check for block level subscription also # combine with original query when can give both dataset and block if item.find('#') > -1 and dset.has_key('block'): for block in dset['block']: if block['name'] == item: nodes = [x['node'] for x in block['subscription'] if x['suspended'] == 'n'] result[item].update(nodes) break return result def emulator(self): return "PhEDEx emulator ...."
def __init__(self, url, **contact): self.dataBlocks = DataBlockGenerator() args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'} self.dbs = _MockDBSApi(args)
class DBSReader: """ Mock up dbs access """ def __init__(self, url, **contact): self.dataBlocks = DataBlockGenerator() args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'} self.dbs = _MockDBSApi(args) def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True, blockName = '*', locations = True): """Fake block info""" blocks = [x for x in self.dataBlocks.getBlocks(dataset) if x['Name'] == blockName or blockName == '*'] if not blocks: # Weird error handling follows, this is what dbs does: # If block specified, return [], else raise DbsBadRequest error if blockName != '*': return [] else: raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error') if locations: for block in blocks: block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \ self.listFileBlockLocation(block['Name'])] return blocks def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ Get a fake list of LFNs for the block """ files = self.listFilesInBlock(fileBlockName) return [x['LogicalFileName'] for x in files] def listFileBlocks(self, dataset, onlyClosedBlocks = False, blockName = '*'): """Get fake block names""" return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, blockName = blockName, locations = False)] def listOpenFileBlocks(self, dataset): """ _listOpenFileBlocks_ Retrieve a list of open fileblock names for a dataset """ return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, locations = False) if str(x['OpenForWriting' ]) == '1'] def listFileBlockLocation(self, block): """Fake locations""" return self.dataBlocks.getLocation(block) def listFilesInBlock(self, fileBlockName): """Fake files""" return self.dataBlocks.getFiles(fileBlockName) def listFilesInBlockWithParents(self, block): return self.dataBlocks.getFiles(block, True) def getFileBlock(self, block): """Return block + locations""" result = { block : { "StorageElements" : self.listFileBlockLocation(block), "Files" : self.listFilesInBlock(block), "IsOpen" : self.dataBlocks._openForWriting(), } } return result def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "StorageElements" : [<se list>], "Files" : dictionaries representing each file } } files """ result = { fileBlockName: { "StorageElements" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlockWithParents(fileBlockName), "IsOpen" : self.dataBlocks._openForWriting(), } } return result def listRuns(self, dataset = None, block = None): def getRunsFromBlock(b): results = set() for x in self.dataBlocks.getFiles(b): results = results.union([y['RunNumber'] for y in x['LumiList']]) return list(results) if block: return getRunsFromBlock(block) if dataset: runs = set() for block in self.dataBlocks.getBlocks(dataset): runs = runs.union(getRunsFromBlock(block['Name'])) return list(runs) return None def listRunLumis(self, dataset = None, block = None): def getRunsFromBlock(b): results = {} for x in self.dataBlocks.getFiles(b): for y in x['LumiList']: if y['RunNumber'] not in results: results[y['RunNumber']] = 0 results[y['RunNumber']] += 1 return results if block: return getRunsFromBlock(block) if dataset: runs = {} for block in self.dataBlocks.getBlocks(dataset): updateRuns = getRunsFromBlock(block['Name']) for run in updateRuns: if run not in runs: runs[run] = 0 runs[run] += updateRuns[run] return runs return None def getDBSSummaryInfo(self, dataset=None, block=None): """Dataset summary""" def getLumisectionsInBlock(b): lumis = set() for file in self.dataBlocks.getFiles(b): for x in file['LumiList']: lumis.add(x['LumiSectionNumber']) return lumis result = {} if block: result['NumberOfEvents'] = str(sum([x['NumberOfEvents'] for x in self.dataBlocks.getFiles(block)])) result['NumberOfFiles'] = str(len(self.dataBlocks.getFiles(block))) result['NumberOfLumis'] = str(len(getLumisectionsInBlock(block))) result['path'] = dataset result['block'] = block result['OpenForWriting'] = '1' if self.dataBlocks._openForWriting() else '0' if dataset: if self.dataBlocks.getBlocks(dataset): result['NumberOfEvents'] = str(sum([x['NumberOfEvents'] for x in self.dataBlocks.getBlocks(dataset)])) result['NumberOfFiles'] = str(sum([x['NumberOfFiles'] for x in self.dataBlocks.getBlocks(dataset)])) lumis = set() for b in self.dataBlocks.getBlocks(dataset): lumis = lumis.union(getLumisectionsInBlock(b['Name'])) result['NumberOfLumis'] = str(len(lumis)) result['path'] = dataset # Weird error handling follows, this is what dbs does if not result: raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters') return result def listBlockParents(self, block): return self.dataBlocks.getParentBlock(block, 1) def listDatasetLocation(self, dataset): """ _listDatasetLocation_ List the SEs where there is at least a block of the given dataset. """ blocks = self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, blockName = '*', locations = True) result = set() for block in blocks: result |= set([x['Name'] for x in block['StorageElementList']]) return list(result)
def __init__(self, acct, hostUrl=None, authUrl=None, configDict=None): print("Using MockRucioApi: acct={}, url={}, authUrl={}".format( acct, hostUrl, authUrl)) configDict = configDict or {} self.dataBlocks = DataBlockGenerator() self.subRequests = {}
def __init__(self, dict=None, responseType="json", secure=True): dict = dict or {} self.dataBlocks = DataBlockGenerator() self.subRequests = {}
class MockPhEDExApi(object): """ Version of Services/PhEDEx intended to be used with mock or unittest.mock """ def __init__(self, dict=None, responseType="json", logger=None, dbsUrl='https://cmsweb.cern.ch/dbs/prod/global/DBSReader'): print("Using MockPhEDExApi") self.dbsUrl = dbsUrl dict = dict or {} self.dataBlocks = DataBlockGenerator() self.subRequests = {} def sitesByBlock(self, block): """ Centralize the algorithm to decide where a block is based on the hash name Args: block: the name of the block Returns: sites: a fake list of sites where the data is """ if hash(block) % 3 == 0: sites = ['T2_XX_SiteA'] elif hash(block) % 3 == 1: sites = ['T2_XX_SiteA', 'T2_XX_SiteB'] else: sites = ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] return sites def getReplicaPhEDExNodesForBlocks(self, block=None, dataset=None, complete='y'): """ Args: block: the name of the block dataset: the name of the dataset complete: ?? Returns: a fake list of blocks and the fakes sites they are at """ if isinstance(dataset, list): dataset = dataset[0] # Dataset is a list in these tests if dataset: # TODO: Generalize this and maybe move dataset detection into sitesByBlock if dataset == PILEUP_DATASET: return { '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] } else: try: DBS3Reader(PROD_DBS).checkDatasetPath(dataset) blocks = DBS3Reader(PROD_DBS).dbs.listBlocks( dataset=dataset) singleBlock = blocks[0]['block_name'] return {singleBlock: self.sitesByBlock(singleBlock)} except DBSReaderError: return { '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: [] } replicas = {} for oneBlock in block: if oneBlock.split('#')[0] == PILEUP_DATASET: # Pileup is at a single site sites = ['T2_XX_SiteC'] _BLOCK_LOCATIONS[oneBlock] = sites else: sites = self.sitesByBlock(block=oneBlock) _BLOCK_LOCATIONS[oneBlock] = sites replicas.update({oneBlock: sites}) return replicas def getReplicaInfoForBlocks(self, **args): """ Where are blocks located """ data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}} for block in args['block']: blocks = data['phedex']['block'] # files = self.dataBlocks.getFiles(block) # locations = self.dataBlocks.getLocation(block) sites = self.sitesByBlock(block=block) blocks.append({ 'files': 1, 'name': block, 'replica': [{ 'node': x } for x in sites] }) return data def getSubscriptionMapping(self, *dataItems, **kwargs): """ Fake version of the existing PhEDEx method """ dataItems = list(set(dataItems)) # force unique items locationMap = {} for dataItem in dataItems: sites = self.sitesByBlock(block=dataItem) locationMap.update({dataItem: sites}) return locationMap def getNodeMap(self): nodeMappings = {"phedex": {"node": []}} nodes = [{ "name": "T1_US_FNAL_MSS", "kind": "MSS", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 1 }, { "name": "T1_US_FNAL_Buffer", "kind": "Buffer", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 2 }, { "name": "T0_CH_CERN_MSS", "kind": "MSS", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 3 }, { "name": "T0_CH_CERN_Buffer", "kind": "Buffer", "se": "srm-cms.cern.ch", "technology": "Castor", "id": 4 }, { "name": "T1_UK_RAL_MSS", "kind": "MSS", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 5 }, { "name": "T1_UK_RAL_Buffer", "kind": "Buffer", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 6 }, { "name": "T1_UK_RAL_Disk", "kind": "Disk", "se": "srm-cms-disk.gridpp.rl.ac.uk", "technology": "Disk", "id": 7 }, { "name": "T2_CH_CERN", "kind": "Disk", "se": "srm-eoscms.cern.ch", "technology": "Disk", "id": 8 }, { "name": "T3_CO_Uniandes", "kind": "Disk", "se": "moboro.uniandes.edu.co", "technology": "DPM", "id": 9 }] for node in nodes: nodeMappings["phedex"]["node"].append(node) return nodeMappings def subscriptions(self, **args): """ Where is data subscribed - for now just replicate blockreplicas """ def _blockInfoGenerator(blockList): for block in blockList: if isinstance(block, dict): block = block['Name'] dataset = self.dataBlocks.getDatasetName(block) # TODO needs to add correct file numbers datasetList = data['phedex']['dataset'] if datasetList: find = False for dataItem in datasetList: if dataItem['name'] == dataset: datasetSelected = dataItem find = True break if not (datasetList and find): data['phedex']['dataset'].append({ 'name': dataset, 'files': FILES_PER_DATASET, 'block': [] }) datasetSelected = data['phedex']['dataset'][-1] subs = [] subs.append({ 'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None, 'level': 'dataset', 'move': 'n', 'request': '47983', 'time_created': '1232989000', 'priority': 'low', 'time_update': None, 'node_id': '781', 'suspended': 'n', 'group': None }) if dataset in self.subRequests: subs.extend(self.subRequests[dataset]) datasetSelected['subscription'] = subs for sub in subs: if sub['level'] == 'block': subs.remove(sub) blocks = datasetSelected['block'] locations = self.dataBlocks.getLocation(block) blocks.append({ "bytes": "10438786614", "files": FILES_PER_BLOCK, "is_open": "n", "name": block, "id": "454370", "subscription": [{ 'node': x + '_MSS', "suspended": "n" } for x in locations] }) data = { 'phedex': { "request_timestamp": 1254850198.15418, 'dataset': [] } } # Different structure depending on whether we ask for dataset or blocks if 'dataset' in args and args['dataset']: blockList = self.dataBlocks.getBlocks(args['dataset']) _blockInfoGenerator(blockList) elif 'block' in args and args['block']: _blockInfoGenerator(args['block']) elif 'group' in args and args['group']: blockList = self.dataBlocks.getBlocks('/a/b-%s/c' % args['group']) _blockInfoGenerator(blockList) return data def getRequestList(self, **kwargs): """ _getRequestList_ Emulated request list, for now it does nothing """ goldenResponse = { "phedex": { "request": [], "request_timestamp": 1368636296.94707, "request_version": "2.3.15-comp", "request_call": "requestlist", "call_time": 0.34183, "request_date": "2013-05-15 16:44:56 UTC" } } return goldenResponse def __getattr__(self, item): """ __getattr__ gets called in case lookup of the actual method fails. We use this to return data based on a lookup table :param item: The method name the user is trying to call :return: The generic lookup function """ def genericLookup(*args, **kwargs): """ This function returns the mocked DBS data :param args: positional arguments it was called with :param kwargs: named arguments it was called with :return: the dictionary that DBS would have returned """ if kwargs: signature = '%s:%s' % (item, sorted(kwargs.iteritems())) else: signature = item try: if MOCK_DATA[self.url][signature] == 'Raises HTTPError': raise HTTPError else: return MOCK_DATA[self.url][signature] except KeyError: raise KeyError( "PhEDEx mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)." % (item, args, kwargs, self.url)) return genericLookup
class PhEDEx: """ """ def __init__(self, *args, **kwargs): print "Using PhEDEx Emulator ...." self.dataBlocks = DataBlockGenerator() def getReplicaInfoForBlocks(self, **args): """ Where are blocks located """ for block in args['block']: data = {"phedex":{"request_timestamp":1254762796.13538, "block" : []}} blocks = data['phedex']['block'] files = self.dataBlocks.getFiles(block) locations = self.dataBlocks.getLocation(block) blocks.append({"files": len(files), "name": block, 'replica' : [{'se' : x } for x in locations]}) return data def subscriptions(self, **args): """ Where is data subscribed - for now just replicate blockreplicas """ data = {'phedex' : {"request_timestamp" : 1254850198.15418, 'dataset' : []}} # different structure depending on whether we ask for dataset or blocks if args.has_key('dataset') and args['dataset']: for dataset in args['dataset']: # TODO needs to add correct file numbers data['phedex']['dataset'].append({'name' : dataset, 'files' : 5, 'subscription' : []}) subs = data['phedex']['dataset'][-1]['subscription'] #FIXME: Take from self.locations subs.append({'node': 'SiteA', 'custodial': 'n', 'suspend_until': None, 'level': 'dataset', 'move': 'n', 'request': '47983', 'time_created': '1232989000', 'priority': 'low', 'time_update': None, 'node_id': '781', 'suspended': 'n', 'group': None}) return data elif args.has_key('block') and args['block']: for block in args['block']: dataset = self.dataBlocks.getDataset('block') # TODO needs to add correct file numbers data['phedex']['dataset'].append({'name' : dataset, 'files' : 5, 'block' : []}) blocks = data['phedex']['dataset'][-1]['block'] locations= self.dataBlocks.getLocation(block) blocks.append({"bytes":"10438786614", "files":"5", "is_open":"n", "name": args['block'], "id":"454370", "subscription" :[ {'node' : x } for x in locations] #{"priority":"normal", "request":"51253", "time_created":"1245165314", # "move":"n", "suspend_until":None, "node":"SiteA", # "time_update":"1228905272", "group":None, "level":"block", # "node_id":"641", "custodial":"n", "suspended":"n"}] }) return data def emulator(self): return "PhEDEx emulator ...."
class PhEDEx(dict): """ """ def __init__(self, *args, **kwargs): # add the end point to prevent the existence check fails. self['endpoint'] = "phedex_emulator" self.dataBlocks = DataBlockGenerator() self.subRequests = {} def injectBlocks(self, node, xmlData, verbose=0, strict=1): """ do nothing don't inject block. """ return None def getNodeSE(self, value): return 'dummy.se.from.emulator' def subscribe(self, subscription, xmlData): """ Store the subscription information in the object, tests can retrieve it and verify it """ args = {} args['node'] = [] for node in subscription.nodes: args['node'].append(node) document = parseString(xmlData) datasets = document.getElementsByTagName("dataset") for dataset in datasets: datasetName = dataset.getAttribute("name") if datasetName not in self.subRequests: self.subRequests[datasetName] = [] args['data'] = xmlData args['level'] = subscription.level args['priority'] = subscription.priority args['move'] = subscription.move args['static'] = subscription.static args['custodial'] = subscription.custodial args['group'] = subscription.group args['request_only'] = subscription.request_only self.subRequests[datasetName].append(args) return def getReplicaInfoForFiles(self, **args): """ _getReplicaInfoForFiles_ TODO: Need to be implemented correctly, Currently not used Retrieve file replica information from PhEDEx. block block name, with '*' wildcards, can be multiple (*). required when no lfn is specified. node node name, can be multiple (*) se storage element name, can be multiple (*) update_since unix timestamp, only return replicas updated since this time create_since unix timestamp, only return replicas created since this time complete y or n. if y, return only file replicas from complete block replicas. if n only return file replicas from incomplete block replicas. default is to return either. dist_complete y or n. if y, return only file replicas from blocks where all file replicas are available at some node. if n, return only file replicas from blocks which have file replicas not available at any node. default is to return either. subscribed y or n, filter for subscription. default is to return either. custodial y or n. filter for custodial responsibility. default is to return either. group group name. default is to return replicas for any group. lfn logical file nam """ return None def getNodeMap(self): """ _getNodeMap_ Retrieve information about nodes known to this PhEDEx instance. Each node entry will have the following keys: name - PhEDEx node name se - Storage element name kind - Node type, e.g. 'Disk' or 'MSS' technology - Node technology, e.g. 'Castor' id - Node id Return some MSS, Buffer and Disk nodes """ nodeMappings = {"phedex": {"node": []}} nodeMappings["phedex"]["node"].append({ "name": "T1_US_FNAL_MSS", "kind": "MSS", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 1 }) nodeMappings["phedex"]["node"].append({ "name": "T1_US_FNAL_Buffer", "kind": "Buffer", "se": "cmssrm.fnal.gov", "technology": "dCache", "id": 2 }) nodeMappings["phedex"]["node"].append({ "name": "T1_UK_RAL_MSS", "kind": "MSS", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 3 }) nodeMappings["phedex"]["node"].append({ "name": "T1_UK_RAL_Buffer", "kind": "Buffer", "se": "srm-cms.gridpp.rl.ac.uk", "technology": "Castor", "id": 4 }) nodeMappings["phedex"]["node"].append({ "name": "T1_UK_RAL_Disk", "kind": "Disk", "se": "srm-cms-disk.gridpp.rl.ac.uk", "technology": "Disk", "id": 5 }) nodeMappings["phedex"]["node"].append({ "name": "T2_CH_CERN", "kind": "Disk", "se": "srm-eoscms.cern.ch", "technology": "Disk", "id": 6 }) nodeMappings["phedex"]["node"].append({ "name": "T3_CO_Uniandes", "kind": "Disk", "se": "moboro.uniandes.edu.co", "technology": "DPM", "id": 7 }) return nodeMappings def getReplicaInfoForBlocks(self, **args): """ Where are blocks located """ data = {"phedex": {"request_timestamp": 1254762796.13538, "block": []}} for block in args['block']: blocks = data['phedex']['block'] files = self.dataBlocks.getFiles(block) locations = self.dataBlocks.getLocation(block) blocks.append({ "files": len(files), "name": block, 'replica': [{ 'node': x + '_MSS' } for x in locations] }) return data def subscriptions(self, **args): """ Where is data subscribed - for now just replicate blockreplicas """ def _blockInfoGenerator(blockList): for block in blockList: if type(block) == dict: block = block['Name'] dataset = self.dataBlocks.getDatasetName(block) # TODO needs to add correct file numbers datasetList = data['phedex']['dataset'] if datasetList: find = False for dataItem in datasetList: if dataItem['name'] == dataset: datasetSelected = dataItem find = True break if not datasetList or find: data['phedex']['dataset'].append({ 'name': dataset, 'files': filesInDataset, 'block': [] }) datasetSelected = data['phedex']['dataset'][-1] subs = [] subs.append({ 'node': 'T2_XX_SiteA_MSS', 'custodial': 'n', 'suspend_until': None, 'level': 'dataset', 'move': 'n', 'request': '47983', 'time_created': '1232989000', 'priority': 'low', 'time_update': None, 'node_id': '781', 'suspended': 'n', 'group': None }) # subs.append({'node': 'T2_XX_SiteB', 'custodial': 'n', 'suspend_until': None, # 'level': 'dataset', 'move': 'n', 'request': '47983', # 'time_created': '1232989000', 'priority': 'low', # 'time_update': None, 'node_id': '781', # 'suspended': 'n', 'group': None}) datasetSelected['subscription'] = subs blocks = datasetSelected['block'] locations = self.dataBlocks.getLocation(block) blocks.append({ "bytes": "10438786614", "files": filesInBlock, "is_open": "n", "name": block, "id": "454370", "subscription": [{ 'node': x + '_MSS', "suspended": "n" } for x in locations] #{"priority":"normal", "request":"51253", "time_created":"1245165314", # "move":"n", "suspend_until":None, "node":"T2_XX_SiteA", # "time_update":"1228905272", "group":None, "level":"block", # "node_id":"641", "custodial":"n", "suspended":"n"}] }) data = { 'phedex': { "request_timestamp": 1254850198.15418, 'dataset': [] } } # different structure depending on whether we ask for dataset or blocks if args.has_key('dataset') and args['dataset']: for dataset in args['dataset']: blockList = self.dataBlocks.getBlocks(dataset) _blockInfoGenerator(blockList) elif args.has_key('block') and args['block']: _blockInfoGenerator(args['block']) return data def getSubscriptionMapping(self, *dataItems, **kwargs): """ Similar basic functionality as self.subscriptions() however: dataItems may be a combination of blocks or datasets and kwargs is passed to PhEDEx; output is parsed and returned in the form { 'dataItem1' : [Node1, Node2] } where dataItem is a block or dataset The following cases are handled: o Input is a block and subscription is a dataset o Input is a block and subscription is a block o Input is a dataset and subscription is a dataset Not supported: o Input is a dataset but only block subscriptions exist """ from collections import defaultdict result = defaultdict(set) kwargs.setdefault('suspended', 'n') # require active subscription dataItems = list(set(dataItems)) # force unique items # Hard to query all at once in one GET call, POST not cacheable # hence, query individually - use httplib2 caching to protect service for item in dataItems: # First query for a dataset level subscription (most common) # this returns block level subscriptions also. # Rely on httplib2 caching to not resend on every block in dataset kwargs['dataset'], kwargs['block'] = [item.split('#')[0]], [] response = self.subscriptions(**kwargs)['phedex'] # iterate over response as can't jump to specific datasets for dset in response['dataset']: if dset['name'] != item.split('#')[0]: continue if dset.has_key('subscription'): # dataset level subscription nodes = [ x['node'] for x in dset['subscription'] if x['suspended'] == 'n' ] result[item].update(nodes) #if we have a block we must check for block level subscription also # combine with original query when can give both dataset and block if item.find('#') > -1 and dset.has_key('block'): for block in dset['block']: if block['name'] == item: nodes = [ x['node'] for x in block['subscription'] if x['suspended'] == 'n' ] result[item].update(nodes) break return result def emulator(self): return "PhEDEx emulator ...."
class DBSReader: """ Mock up dbs access """ def __init__(self, url, **contact): self.dataBlocks = DataBlockGenerator() args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'} self.dbs = _MockDBSApi(args) def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True, blockName = '*', locations = True): """Fake block info""" blocks = [x for x in self.dataBlocks.getBlocks(dataset) if x['Name'] == blockName or blockName == '*'] if not blocks: # Weird error handling follows, this is what dbs does: # If block specified, return [], else raise DbsBadRequest error if blockName != '*': return [] else: raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error') if locations: for block in blocks: block['PhEDExNodeList'] = [{'Role' : '', 'Name' : x} for x in \ self.listFileBlockLocation(block['Name'])] return blocks def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ Get a fake list of LFNs for the block """ files = self.listFilesInBlock(fileBlockName) return [x['LogicalFileName'] for x in files] def listFileBlocks(self, dataset, onlyClosedBlocks = False, blockName = '*'): """Get fake block names""" return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, blockName = blockName, locations = False)] def listOpenFileBlocks(self, dataset): """ _listOpenFileBlocks_ Retrieve a list of open fileblock names for a dataset """ return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, locations = False) if str(x['OpenForWriting' ]) == '1'] def listFileBlockLocation(self, block): """Fake locations""" return self.dataBlocks.getLocation(block) def listFilesInBlock(self, fileBlockName): """Fake files""" return self.dataBlocks.getFiles(fileBlockName) def listFilesInBlockWithParents(self, block): return self.dataBlocks.getFiles(block, True) def getFileBlock(self, block): """Return block + locations""" result = { block : { "PhEDExNodeNames" : self.listFileBlockLocation(block), "Files" : self.listFilesInBlock(block), "IsOpen" : self.dataBlocks._openForWriting(), } } return result def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "PhEDExNodeNames" : [<pnn list>], "Files" : dictionaries representing each file } } files """ result = { fileBlockName: { "PhEDExNodeNames" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlockWithParents(fileBlockName), "IsOpen" : self.dataBlocks._openForWriting(), } } return result def listRuns(self, dataset = None, block = None): def getRunsFromBlock(b): results = set() for x in self.dataBlocks.getFiles(b): results = results.union([y['RunNumber'] for y in x['LumiList']]) return list(results) if block: return getRunsFromBlock(block) if dataset: runs = set() for block in self.dataBlocks.getBlocks(dataset): runs = runs.union(getRunsFromBlock(block['Name'])) return list(runs) return None def listRunLumis(self, dataset = None, block = None): def getRunsFromBlock(b): results = {} for x in self.dataBlocks.getFiles(b): for y in x['LumiList']: if y['RunNumber'] not in results: results[y['RunNumber']] = 0 results[y['RunNumber']] = None # To match DBS3 return results if block: return getRunsFromBlock(block) if dataset: runs = {} for block in self.dataBlocks.getBlocks(dataset): updateRuns = getRunsFromBlock(block['Name']) for run in updateRuns: if run not in runs: runs[run] = 0 runs[run] = None # To match DBS3 return runs return None def getDBSSummaryInfo(self, dataset=None, block=None): """Dataset summary""" def getLumisectionsInBlock(b): lumis = 0 for file in self.dataBlocks.getFiles(b): for x in file['LumiList']: lumis =+ len(x['LumiSectionNumber']) return lumis result = {} if block: result['NumberOfEvents'] = str(sum([x['NumberOfEvents'] for x in self.dataBlocks.getFiles(block)])) result['NumberOfFiles'] = str(len(self.dataBlocks.getFiles(block))) result['NumberOfLumis'] = str(getLumisectionsInBlock(block)) result['path'] = dataset result['block'] = block result['OpenForWriting'] = '1' if self.dataBlocks._openForWriting() else '0' if dataset: if self.dataBlocks.getBlocks(dataset): result['NumberOfEvents'] = str(sum([x['NumberOfEvents'] for x in self.dataBlocks.getBlocks(dataset)])) result['NumberOfFiles'] = str(sum([x['NumberOfFiles'] for x in self.dataBlocks.getBlocks(dataset)])) lumis = 0 for b in self.dataBlocks.getBlocks(dataset): lumis += b['NumberOfLumis'] result['NumberOfLumis'] = str(lumis) result['path'] = dataset # Weird error handling follows, this is what dbs does if not result: raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters') return result def listBlockParents(self, block): return self.dataBlocks.getParentBlock(block, 1) def listDatasetLocation(self, dataset): """ _listDatasetLocation_ List the SEs where there is at least a block of the given dataset. """ blocks = self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, blockName = '*', locations = True) result = set() for block in blocks: result |= set([x['Name'] for x in block['PhEDExNodeList']]) return list(result) def getFileListByDataset(self, dataset, detail=True): return self.dbs.listFileArray(dataset)
def __init__(self, *args, **kwargs): print "Using DBS Emulator ..." self.dataBlocks = DataBlockGenerator()
class DBSReader: """ Mock up dbs access """ def __init__(self, url, **contact): self.dataBlocks = DataBlockGenerator() args = { "url" : url, "level" : 'ERROR', "version" : 'DBS_2_0_9'} self.dbs = _MockDBSApi(args) def getFileBlocksInfo(self, dataset, onlyClosedBlocks = True, blockName = '*', locations = True): """Fake block info""" blocks = [x for x in self.dataBlocks.getBlocks(dataset) if x['Name'] == blockName or blockName == '*'] if not blocks: # Weird error handling follows, this is what dbs does: # If block specified, return [], else raise DbsBadRequest error if blockName != '*': return [] else: raise DBSReaderError('DbsBadRequest: DBS Server Raised An Error') if locations: for block in blocks: block['StorageElementList'] = [{'Role' : '', 'Name' : x} for x in \ self.listFileBlockLocation(block['Name'])] return blocks def listFileBlocks(self, dataset, onlyClosedBlocks = False, blockName = '*'): """Get fake block names""" return [x['Name'] for x in self.getFileBlocksInfo(dataset, onlyClosedBlocks = False, blockName = blockName, locations = False)] def listFileBlockLocation(self, block): """Fake locations""" return self.dataBlocks.getLocation(block) def listFilesInBlock(self, block): """Fake files""" return self.dataBlocks.getFiles(block) def listFilesInBlockWithParents(self, block): return self.dataBlocks.getFiles(block, True) def getFileBlock(self, block): """Return block + locations""" result = { block : { "StorageElements" : self.listFileBlockLocation(block), "Files" : self.listFilesInBlock(block), "IsOpen" : False, } } return result def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "StorageElements" : [<se list>], "Files" : dictionaries representing each file } } files """ result = { fileBlockName: { "StorageElements" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlockWithParents(fileBlockName), "IsOpen" : False, } } return result def listRuns(self, dataset = None, block = None): def getRunsFromBlock(b): results = [] for x in self.dataBlocks.getFiles(b): results.extend([y['RunNumber'] for y in x['LumiList']]) return results if block: return getRunsFromBlock(block) if dataset: runs = [] for block in self.dataBlocks.getBlocks(dataset): runs.extend(getRunsFromBlock(block['Name'])) return runs return None def getDBSSummaryInfo(self, dataset=None, block=None): """Dataset summary""" def getLumisectionsInBlock(b): lumis = set() for file in self.dataBlocks.getFiles(b): for x in file['LumiList']: lumis.add(x['LumiSectionNumber']) return lumis result = {} if block: result['NumberOfEvents'] = sum([x['NumberOfEvents'] for x in self.dataBlocks.getFiles(block)]) result['NumberOfFiles'] = len(self.dataBlocks.getFiles(block)) result['NumberOfLumis'] = len(getLumisectionsInBlock(block)) result['path'] = dataset result['block'] = block if dataset: if self.dataBlocks.getBlocks(dataset): result['NumberOfEvents'] = sum([x['NumberOfEvents'] for x in self.dataBlocks.getBlocks(dataset)]) result['NumberOfFiles'] = sum([x['NumberOfFiles'] for x in self.dataBlocks.getBlocks(dataset)]) lumis = set() for b in self.dataBlocks.getBlocks(dataset): lumis = lumis.union(getLumisectionsInBlock(b['Name'])) result['NumberOfLumis'] = len(lumis) result['path'] = dataset # Weird error handling follows, this is what dbs does if not result: raise DBSReaderError('DbsConnectionError: Database exception,Invalid parameters') return result def listBlockParents(self, block): return self.dataBlocks.getParentBlock(block, 1)