def getFileBlocksInfo(self, dataset, onlyClosedBlocks=False, blockName=None, locations=True): """ """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': True} if blockName: args['block_name'] = blockName try: blocks = self.dbs.listBlocks(**args) except Exception as ex: msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) blocks = [remapDBS3Keys(block, stringify=True, block_name='Name') for block in blocks] # only raise if blockName not specified - mimic dbs2 error handling if not blocks and not blockName: msg = "DBSReader.getFileBlocksInfo(%s, %s): No matching data" raise DBSReaderError(msg % (dataset, blockName)) if locations: for block in blocks: block['PhEDExNodeList'] = [{'Name': x} for x in self.listFileBlockLocation(block['Name'])] if onlyClosedBlocks: return [x for x in blocks if str(x['OpenForWriting']) != "1"] return blocks
def listFilesInBlock(self, fileBlockName, lumis=True, validFileOnly=1): """ _listFilesInBlock_ Get a list of files in the named fileblock TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. We need to clean code up when dbs2 is completely deprecated. calling lumis for run number is expensive. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: files = self.dbs.listFileArray(block_name=fileBlockName, validFileOnly=validFileOnly, detail=True) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if lumis: lumiDict = self._getLumiList(blockName=fileBlockName, validFileOnly=validFileOnly) result = [] for fileInfo in files: if lumis: fileInfo["LumiList"] = lumiDict[fileInfo['logical_file_name']] result.append(remapDBS3Keys(fileInfo, stringify=True)) return result
def getDBSSummaryInfo(self, dataset=None, block=None): """ Get dataset summary includes # of files, events, blocks and total size """ # FIXME: Doesnt raise exceptions on missing data as old api did if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name=block, validFileOnly=1) else: # dataset case dataset shouldn't be None summary = self.dbs.listFileSummaries(dataset=dataset, validFileOnly=1) except Exception as ex: msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not summary or summary[0].get( 'file_size') is None: # appears to indicate missing dataset msg = "DBSReader.listDatasetSummary(%s, %s): No matching data" raise DBSReaderError(msg % (dataset, block)) result = remapDBS3Keys(summary[0], stringify=True) result['path'] = dataset if dataset else '' result['block'] = block if block else '' return result
def listFilesInBlockWithParents(self, fileBlockName, lumis=True): """ _listFilesInBlockWithParents_ Get a list of files in the named fileblock including the parents of that file. TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: #TODO: shoud we get only valid block for this? files = self.dbs.listFileParents(block_name=fileBlockName) fileDetails = self.listFilesInBlock(fileBlockName, lumis) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % ( fileBlockName, ) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) childByParents = defaultdict(list) for f in files: # Probably a child can have more than 1 parent file for fp in f['parent_logical_file_name']: childByParents[fp].append(f['logical_file_name']) parentsLFNs = childByParents.keys() parentFilesDetail = [] #TODO: slicing parentLFNs util DBS api is handling that. #Remove slicing if DBS api handles for pLFNs in slicedIterator(parentsLFNs, 50): parentFilesDetail.extend( self.dbs.listFileArray(logical_file_name=pLFNs, detail=True)) if lumis: parentLumis = self._getLumiList(lfns=parentsLFNs) parentsByLFN = defaultdict(list) for pf in parentFilesDetail: parentLFN = pf['logical_file_name'] dbsFile = remapDBS3Keys(pf, stringify=True) if lumis: dbsFile["LumiList"] = parentLumis[parentLFN] for childLFN in childByParents[parentLFN]: parentsByLFN[childLFN].append(dbsFile) for fileInfo in fileDetails: fileInfo["ParentList"] = parentsByLFN[ fileInfo['logical_file_name']] return fileDetails
def genericLookup(self, *args, **kwargs): """ This function returns the mocked DBS data :param args: positional arguments it was called with :param kwargs: named arguments it was called with :return: the dictionary that DBS would have returned """ if self.url not in mockData.keys(): raise DBSReaderError( "Mock DBS emulator knows nothing about instance %s" % self.url) if kwargs: signature = '%s:%s' % (self.item, sorted(kwargs.iteritems())) else: signature = self.item try: if mockData[self.url][signature] == 'Raises HTTPError': raise HTTPError( 'http:/dbs.mock.fail', 400, 'MockDBS is raising an exception in place of DBS', 'Dummy header', 'Dummy body') else: return mockData[self.url][signature] except KeyError: raise KeyError( "DBS mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s)." % (self.item, args, kwargs, self.url))
class DBS3Reader: """ _DBSReader_ General API for reading data from DBS """ def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except DbsException, ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg) # setup DLS api dlsType = 'DLS_TYPE_PHEDEX' dlsUrl = 'https://cmsweb.cern.ch/phedex/datasvc/xml/prod' try: self.dls = dlsClient.getDlsApi(dls_type=dlsType, dls_endpoint=dlsUrl, version='') except DlsApiError, ex: msg = "Error in DBSReader with DlsApi\n" msg += "%s\n" % str(ex) raise DBSReaderError(msg)
def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly) elif lfns: lumiLists = [] for slfn in grouper(lfns, 50): lumiLists.extend( self.dbs.listFileLumiArray(logical_file_name=slfn)) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) lumiDict = {} for lumisItem in lumiLists: lumiDict.setdefault(lumisItem['logical_file_name'], []) item = {} item["RunNumber"] = lumisItem['run_num'] item['LumiSectionNumber'] = lumisItem['lumi_section_num'] lumiDict[lumisItem['logical_file_name']].append(item) return lumiDict
def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly) elif lfns: lumiLists = [] for slfn in grouper(lfns, 50): lumiLists.extend( self.dbs.listFileLumiArray(logical_file_name=slfn)) else: # shouldn't call this with both blockName and lfns empty # but still returns empty dict for that case return {} except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) lumiDict = {} for lumisItem in lumiLists: lumiDict.setdefault(lumisItem['logical_file_name'], []) item = {} item["RunNumber"] = lumisItem['run_num'] item['LumiSectionNumber'] = lumisItem['lumi_section_num'] if lumisItem.get('event_count', None) is not None: item['EventCount'] = lumisItem['event_count'] lumiDict[lumisItem['logical_file_name']].append(item) # TODO: add key for lumi and event pair. return lumiDict
def listFilesInBlockWithParents(self, fileBlockName): """ _listFilesInBlockWithParents_ Get a list of files in the named fileblock including the parents of that file. """ try: files = self.dbs.listFiles( "", # path "", #primary "", # processed [], #tier_list "", #analysisDataset fileBlockName, details="True", retriveList=['retrive_parent']) except DbsException, ex: msg = "Error in " msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % ( fileBlockName, ) msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg)
def getFileBlock(self, fileBlockName): """ _getFileBlock_ return a dictionary: { blockName: { "StorageElements" : [<se list>], "Files" : { LFN : Events }, } } """ if not self.blockExists(fileBlockName): msg = "DBSReader.getFileBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) result = { fileBlockName: { "StorageElements" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlock(fileBlockName), "IsOpen" : self.blockIsOpen(fileBlockName), } } return result
def getDBSSummaryInfo(self, dataset=None, block=None): """ Get dataset summary includes # of files, events, blocks and total size """ if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name=block, validFileOnly=1) else: summary = self.dbs.listFileSummaries(dataset=dataset, validFileOnly=1) except Exception as ex: msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not summary: # missing data or all files invalid return {} result = remapDBS3Keys(summary[0], stringify=True) result['path'] = dataset if dataset else '' result['block'] = block if block else '' return result
def genericLookup(self, *args, **kwargs): """ This function returns the mocked DBS data :param args: positional arguments it was called with :param kwargs: named arguments it was called with :return: the dictionary that DBS would have returned """ if self.url not in mockData: raise DBSReaderError("Mock DBS emulator knows nothing about instance %s" % self.url) if kwargs: for k in kwargs: if isinstance(kwargs[k], (list, tuple)): kwargs[k] = [encodeUnicodeToBytesConditional(item, condition=PY2) for item in kwargs[k]] else: kwargs[k] = encodeUnicodeToBytesConditional(kwargs[k], condition=PY2) signature = '%s:%s' % (self.item, sorted(viewitems(kwargs))) else: signature = self.item try: if mockData[self.url][signature] == 'Raises HTTPError': raise HTTPError('http:/dbs.mock.fail', 400, 'MockDBS is raising an exception in place of DBS', 'Dummy header', 'Dummy body') else: return mockData[self.url][signature] except KeyError: raise KeyError("DBS mock API could not return data for method %s, args=%s, and kwargs=%s (URL %s) (Signature: %s)" % (self.item, args, kwargs, self.url, signature))
def getFileBlock(self, fileBlockName, dbsOnly=False): """ _getFileBlock_ dbsOnly flag is mostly meant for StoreResults, since there is no data in TMDB. return a dictionary: { blockName: { "PhEDExNodeNames" : [<pnn list>], "Files" : { LFN : Events }, } } """ # Pointless code in python3 if isinstance(fileBlockName, str): fileBlockName = unicode(fileBlockName) if not self.blockExists(fileBlockName): msg = "DBSReader.getFileBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) result = { fileBlockName: { "PhEDExNodeNames": self.listFileBlockLocation(fileBlockName, dbsOnly), "Files": self.listFilesInBlock(fileBlockName), "IsOpen": self.blockIsOpen(fileBlockName) } } return result
def listRunLumis(self, dataset=None, block=None): """ It gets a list of DBSRun objects and returns the number of lumisections per run DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ try: if block: results = self.dbs.listRuns(block=block) else: results = self.dbs.listRuns(dataset=dataset) except DbsException, ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg)
def __init__(self, url, **contact): args = {"url": url, "level": 'ERROR', "version": ''} args.update(contact) #try: self.dbs = DbsApi(args) #except DbsException, ex: # msg = "Error in DBSReader with DbsApi\n" # msg += "%s\n" % formatEx(ex) # raise DBSReaderError(msg) # setup DLS api - with either dbs or phedex depending on dbs instance if url.count('cmsdbsprod.cern.ch/cms_dbs_prod_global') or \ self.dbs.getServerInfo()['InstanceName'] == 'GLOBAL': dlsType = 'DLS_TYPE_PHEDEX' dlsUrl = 'https://cmsweb.cern.ch/phedex/datasvc/xml/prod' else: dlsType = 'DLS_TYPE_DBS' dlsUrl = url try: self.dls = dlsClient.getDlsApi(dls_type=dlsType, dls_endpoint=dlsUrl, version=args['version']) except DlsApiError, ex: msg = "Error in DBSReader with DlsApi\n" msg += "%s\n" % str(ex) raise DBSReaderError(msg)
def getFileBlock(self, fileBlockName): """ _getFileBlock_ return a dictionary: { blockName: { "PhEDExNodeNames" : [<pnn list>], "Files" : { LFN : Events }, } } """ # Pointless code in python3 if isinstance(fileBlockName, str): fileBlockName = unicode(fileBlockName) if not self.blockExists(fileBlockName): msg = "DBSReader.getFileBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) result = { fileBlockName: { "PhEDExNodeNames": self.listFileBlockLocation(fileBlockName), "Files": self.listFilesInBlock(fileBlockName), "IsOpen": self.blockIsOpen(fileBlockName) } } return result
def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "StorageElements" : [<se list>], "Files" : dictionaries representing each file } } files """ if not self.blockExists(fileBlockName): msg = "DBSReader.getFileBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) result = { fileBlockName: { "StorageElements" : self.listFileBlockLocation(fileBlockName), "Files" : self.listFilesInBlockWithParents(fileBlockName), "IsOpen" : self.blockIsOpen(fileBlockName), } } return result
def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): """ _listFileBlocks_ Retrieve a list of fileblock names for a dataset """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': False} if blockName: args['block_name'] = blockName if onlyClosedBlocks: args['detail'] = True try: blocks = self.dbs.listBlocks(**args) except dbsClientException as ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if onlyClosedBlocks: result = [ x['block_name'] for x in blocks if str(x['open_for_writing']) != "1" ] else: result = [x['block_name'] for x in blocks] return result
def listDatasetLocation(self, datasetName): """ _listDatasetLocation_ List the origin SEs where there is at least a block of the given dataset. """ self.checkDatasetPath(datasetName) locations = set() try: blocksInfo = self.dbs.listBlockOrigin(dataset=datasetName) except dbsClientException as ex: msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not blocksInfo: # no data location from dbs return list() for blockInfo in blocksInfo: locations.update(blockInfo['origin_site_name']) locations.difference_update( ['UNKNOWN', None]) # remove entry when SE name is 'UNKNOWN' return list(locations)
def getFileBlockWithParents(self, fileBlockName): """ _getFileBlockWithParents_ return a dictionary: { blockName: { "PhEDExNodeNames" : [<pnn list>], "Files" : dictionaries representing each file } } files """ if isinstance(fileBlockName, str): fileBlockName = unicode(fileBlockName) if not self.blockExists(fileBlockName): msg = "DBSReader.getFileBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) result = { fileBlockName: { "PhEDExNodeNames": self.listFileBlockLocation(fileBlockName), "Files": self.listFilesInBlockWithParents(fileBlockName), "IsOpen": self.blockIsOpen(fileBlockName) } } return result
def listRuns(self, dataset=None, block=None): """ it gets list of DbsRun object but for our purpose only list of number is collected. DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ runs = [] try: if block: results = self.dbs.listRuns(block_name=block) else: results = self.dbs.listRuns(dataset=dataset) except dbsClientException as ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) for x in results: runs.extend(x['run_num']) return runs
def checkDatasetPath(self, pathName): """ _checkDatasetPath_ """ if pathName in ("", None): raise DBSReaderError("Invalid Dataset Path name: => %s <=" % pathName)
def checkDatasetPath(self, pathName): """ _checkDatasetPath_ """ if pathName in ("", None): raise DBSReaderError("Invalid Dataset Path name: => %s <=" % pathName) else: try: result = self.dbs.listDatasets(dataset=pathName, dataset_access_type='*') if len(result) == 0: raise DBSReaderError("Dataset %s doesn't exist in DBS %s" % (pathName, self.dbsURL)) except (dbsClientException, HTTPError) as ex: msg = "Error in " msg += "DBSReader.checkDatasetPath(%s)\n" % pathName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) return
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except DbsException, ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg)
def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ LFN list only for block, details = False => faster query """ if not self.blockExists(fileBlockName): msg = "DBSReader.lfnsInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: files = self.dbs.listFiles(block_name = fileBlockName, detail = False) except DbsException, ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg)
def getFileBlocksInfo(self, dataset, onlyClosedBlocks=False): """ """ self.checkDatasetPath(dataset) try: blocks = self.dbs.listBlocks(dataset) except DbsException, ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx(ex) raise DBSReaderError(msg)
def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ LFN list only for block, details = False => faster query """ if not self.blockExists(fileBlockName): msg = "DBSReader.lfnsInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: lfns = self.dbs.listFileArray(block_name=fileBlockName, validFileOnly=1, detail=False) return lfns except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def __init__(self, url, logger=None, **contact): # instantiate dbs api object try: self.dbsURL = url.replace("cmsweb.cern.ch", "cmsweb-prod.cern.ch") self.dbs = DbsApi(self.dbsURL, **contact) self.logger = logger or logging.getLogger(self.__class__.__name__) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listDatasetParents(self, childDataset): """ list the the parents dataset path given childDataset """ try: parentList = self.dbs.listDatasetParents(dataset=childDataset) return parentList except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listDatasetParents(%s)\n" % childDataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def checkDBSServer(self): """ check whether dbs server is up and running returns {"dbs_instance": "prod/global", "dbs_version": "3.3.144"} """ try: return self.dbs.serverinfo() except dbsClientException as ex: msg = "Error in " msg += "DBS server is not up: %s" % self.dbsURL msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)