def getPileupLockedAndAvailable(self, container, account, scope="cms"): """ Mock method to resolve where the pileup container (and all its blocks) is locked and available. """ logging.info("%s: calling mock getPileupLockedAndAvailable", self.__class__.__name__) result = dict() if not self.isContainer(container): raise WMRucioException( "Pileup location needs to be resolved for a container DID type" ) kwargs = dict(name=container, account=account, scope=scope) try: DBS3Reader(PROD_DBS).checkDatasetPath(kwargs['name']) blocks = DBS3Reader(PROD_DBS).listFileBlocks( dataset=kwargs['name']) for block in blocks: result[block] = self.sitesByBlock(block) except DBSReaderError: logging.error("%s: Failed to fetch blocks from DBS", self.__class__.__name__) return result
def testListDatatiers(self): """ listDatatiers returns all datatiers available """ results = DBSReader.listDatatiers(self.endpoint) self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) # dbsUrl is mandatory with self.assertRaises(DBSReaderError): _ = DBSReader.listDatatiers() return
def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')
def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4] + 'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') self.assertTrue(self.dbs.listFileBlockLocation(BLOCK)) # This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertTrue(self.dbs.listFileBlockLocation(WRONG_BLOCK)) # test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames): """ _makePhEDExDrop_ Given a DBS Url, dataset name and list of blockNames, generate an XML structure for injection """ from WMCore.Services.DBS.DBS3Reader import DBS3Reader reader = DBS3Reader(dbsUrl) spec = XMLInjectionSpec(dbsUrl) dataset = spec.getDataset(datasetPath) for block in blockNames: blockContent = reader.getFileBlock(block) if blockContent['IsOpen']: xmlBlock = dataset.getFileblock(block, "y") else: xmlBlock = dataset.getFileblock(block, "n") # Any Checksum from DBS is type cksum for x in blockContent[block]['Files']: checksums = {'cksum': x['Checksum']} if x.get('Adler32') not in (None, ''): checksums['adler32'] = x['Adler32'] xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize']) xml = spec.save() return xml
def getFromDBS(dataset, logger): """ Uses the WMCore DBS3Reader object to fetch all the blocks and files for a given container. Returns a dictionary key'ed by the block name, and an inner dictionary with the number of valid and invalid files. It also returns a total counter for the number of valid and invalid files in the dataset. """ dbsReader = DBS3Reader(DBS_URL, logger) result = dict() dbsFilesCounter = Counter({'valid': 0, 'invalid': 0}) blocks = dbsReader.listFileBlocks(dataset) for block in blocks: data = dbsReader.dbs.listFileArray(block_name=block, validFileOnly=0, detail=True) result.setdefault(block, Counter({'valid': 0, 'invalid': 0})) for fileInfo in data: if fileInfo['is_file_valid'] == 1: result[block]['valid'] += 1 dbsFilesCounter['valid'] += 1 else: result[block]['invalid'] += 1 dbsFilesCounter['invalid'] += 1 return result, dbsFilesCounter
def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], 22075) self.assertEqual(dataset['NumberOfBlocks'], 46) self.assertEqual(dataset['FileSize'], 4001680824) self.assertEqual(dataset['file_size'], 4001680824) self.assertEqual(dataset['NumberOfFiles'], 49) self.assertEqual(dataset['NumberOfLumis'], 7223) block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], DATASET) self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], 377) self.assertEqual(block['NumberOfBlocks'], 1) self.assertEqual(block['FileSize'], 150780132) self.assertEqual(block['file_size'], 150780132) self.assertEqual(block['NumberOfFiles'], 2) self.assertEqual(block['NumberOfLumis'], 94) with self.assertRaises(DBSReaderError): self.dbs.getDBSSummaryInfo(DATASET + 'blah') with self.assertRaises(DBSReaderError): self.dbs.getDBSSummaryInfo(DATASET, BLOCK + 'asas')
def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual( details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111 ])
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS) self.assertItemsEqual([PARENT_BLOCK], parents) self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))
def getReplicaPhEDExNodesForBlocks(self, block=None, dataset=None, complete='y'): """ Args: block: the name of the block dataset: the name of the dataset complete: ?? Returns: a fake list of blocks and the fakes sites they are at """ if isinstance(dataset, list): dataset = dataset[0] # Dataset is a list in these tests if dataset: # TODO: Generalize this and maybe move dataset detection into sitesByBlock if dataset == PILEUP_DATASET: return { '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] } else: try: DBS3Reader(PROD_DBS).checkDatasetPath(dataset) blocks = DBS3Reader(PROD_DBS).dbs.listBlocks( dataset=dataset) singleBlock = blocks[0]['block_name'] return {singleBlock: self.sitesByBlock(singleBlock)} except DBSReaderError: return { '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset: [] } replicas = {} for oneBlock in block: if oneBlock.split('#')[0] == PILEUP_DATASET: # Pileup is at a single site sites = ['T2_XX_SiteC'] _BLOCK_LOCATIONS[oneBlock] = sites else: sites = self.sitesByBlock(block=oneBlock) _BLOCK_LOCATIONS[oneBlock] = sites replicas.update({oneBlock: sites}) return replicas
def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ reader = DBS3Reader(dbsUrl) phedex = PhEDEx() inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.assertTrue(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "PhEDExNodeNames": []}, # "BlockB": {"FileList": [], "PhEDExNodeNames": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] for dataset in datasets: dbsFileBlocks = reader.listFileBlocks(dataset=dataset) blocksLocation = phedex.getReplicaPhEDExNodesForBlocks( dataset=dataset, complete='y') for dbsFileBlockName in dbsFileBlocks: fileList = [] pnns = set() for pnn in blocksLocation[dbsFileBlockName]: pnns.add(pnn) # now get list of files in the block dbsFiles = reader.listFilesInBlock(dbsFileBlockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ("PNNs don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual( set(blockDict[dbsFileBlockName]["PhEDExNodeNames"]), pnns, m) m = ( "FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) storedFileList = [ item['logical_file_name'] for item in blockDict[dbsFileBlockName]["FileList"] ] self.assertItemsEqual(storedFileList, fileList, m)
def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')
def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))
def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')
def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS) self.assertEqual(len(block), 1) block = block[BLOCK_WITH_PARENTS] self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS) self.assertEqual(1, len(parents)) self.assertEqual(PARENT_BLOCK, parents[0]['Name']) self.assertTrue(parents[0]['PhEDExNodeList']) self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))
def checkDBSUrl(dbsUrl): if dbsUrl: try: DBS3Reader(dbsUrl).dbs.serverinfo() except: raise WMWorkloadToolsException("DBS is not responding: %s" % dbsUrl) return True
def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block)
def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(block=BLOCK) self.assertEqual(1, len(runs)) self.assertEqual([173657], runs)
def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))
def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return
def _validateDatatier(datatier, dbsUrl): """ _validateDatatier_ Provided a list of datatiers extracted from the outputDatasets, checks whether they all exist in DBS already. """ dbsTiers = DBSReader.listDatatiers(dbsUrl) badTiers = list(set(datatier) - set(dbsTiers)) if badTiers: raise InvalidSpecParameterValue("Bad datatier(s): %s not available in DBS." % badTiers)
def testListDatatiers(self): """ listDatatiers returns all datatiers available """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listDatatiers() self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) return
def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts (None for DBS3)""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], None) runs = self.dbs.listRunLumis(block=BLOCK) self.assertEqual(1, len(runs)) self.assertTrue(173657 in runs) self.assertEqual(runs[173657], None)
def validateDatatier(self, datatier, dbsUrl): """ _validateDatatier_ Provided a list of datatiers extracted from the outputDatasets, checks whether they all exist in DBS already. """ dbsTiers = DBSReader.listDatatiers(dbsUrl) badTiers = list(set(datatier) - set(dbsTiers)) if badTiers: raise cherrypy.HTTPError(400, "Bad datatier(s): %s not available in DBS." % badTiers)
def DBSReader(endpoint, **kwargs): """Function to find and instantiate desired DBSReader object""" try: dbs = DBS3Reader(endpoint, **kwargs) # if this doesn't throw endpoint is dbs3 dbs.dbs.serverinfo() return dbs except Exception as ex: msg = 'Instantiating DBS3Reader failed with %s\n' % str(ex) raise DBSReaderError("Can't contact DBS at %s, got errors %s" % (endpoint, msg))
def _validateInputDataset(arguments): inputdataset = arguments.get("InputDataset", None) dbsURL = arguments.get("DbsUrl", None) if inputdataset != None and dbsURL != None: #import DBS3Reader here, since Runtime code import this module and worker node doesn't have dbs3 client from WMCore.Services.DBS.DBS3Reader import DBS3Reader from WMCore.Services.DBS.DBSErrors import DBSReaderError try: DBS3Reader(dbsURL).checkDatasetPath(inputdataset) except DBSReaderError as ex: # we need to Wrap the exception to WMSpecFactoryException to be caught in reqmgr validation raise WMSpecFactoryException(str(ex)) return
def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(4, len(files)) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name']) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName']) self.assertEqual( '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')
def getContainerLockedAndAvailable(self, **kwargs): """ Mock the method to discover where container data is locked and available. Note that, by default, it will not return any Tape RSEs. :return: a unique list of RSEs """ logging.info("%s: Calling mock getContainerLockedAndAvailable", self.__class__.__name__) if 'name' not in kwargs: raise WMRucioException( "A DID name must be provided to the getContainerLockedAndAvailable API" ) kwargs.setdefault("scope", "cms") if kwargs['name'] == PILEUP_DATASET: return ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'] try: DBS3Reader(PROD_DBS).checkDatasetPath(kwargs['name']) blocks = DBS3Reader(PROD_DBS).dbs.listBlocks( dataset=kwargs['name']) singleBlock = blocks[0]['block_name'] return self.sitesByBlock(singleBlock) except DBSReaderError: return []
def validateInputDatasSetAndParentFlag(arguments): inputdataset = arguments.get("InputDataset", None) if strToBool(arguments.get("IncludeParents", False)): if inputdataset == None: msg = "IncludeParent flag is True but there is no inputdataset" raise WMSpecFactoryException(msg) else: dbsURL = arguments.get("DbsUrl", None) if dbsURL != None: #import DBS3Reader here, since Runtime code import this module and worker node doesn't have dbs3 client from WMCore.Services.DBS.DBS3Reader import DBS3Reader result = DBS3Reader(dbsURL).listDatasetParents(inputdataset) if len(result) == 0: msg = "IncludeParent flag is True but inputdataset %s doesn't have parents" % (inputdataset) raise WMSpecFactoryException(msg) else: _validateInputDataset(arguments) return
def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) self.assertEqual(0, block['OpenForWriting']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern self.assertTrue(block['PhEDExNodeList']) # weird error handling - depends on whether block or dataset is missing with self.assertRaises(DBSReaderError): self.dbs.getFileBlocksInfo(DATASET + 'blah') with self.assertRaises(DBSReaderError): self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas')
def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111])
def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files)
def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))
def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK))
def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse')
def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46)
def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')
class DBSReaderTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None super(DBSReaderTest, self).setUp() return def tearDown(self): """ _tearDown_ """ super(DBSReaderTest, self).tearDown() return def testListDatatiers(self): """ listDatatiers returns all datatiers available """ results = DBSReader.listDatatiers(self.endpoint) self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) # dbsUrl is mandatory with self.assertRaises(DBSReaderError): _ = DBSReader.listDatatiers() return def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(block=BLOCK) self.assertEqual(1, len(runs)) self.assertEqual([173657], runs) def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts (None for DBS3)""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], None) runs = self.dbs.listRunLumis(block=BLOCK) self.assertEqual(1, len(runs)) self.assertTrue(173657 in runs) self.assertEqual(runs[173657], None) def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]) def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], 22075) self.assertEqual(dataset['NumberOfBlocks'], 46) self.assertEqual(dataset['FileSize'], 4001680824) self.assertEqual(dataset['file_size'], 4001680824) self.assertEqual(dataset['NumberOfFiles'], 49) self.assertEqual(dataset['NumberOfLumis'], 7223) block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], DATASET) self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], 377) self.assertEqual(block['NumberOfBlocks'], 1) self.assertEqual(block['FileSize'], 150780132) self.assertEqual(block['file_size'], 150780132) self.assertEqual(block['NumberOfFiles'], 2) self.assertEqual(block['NumberOfLumis'], 94) with self.assertRaises(DBSReaderError): self.dbs.getDBSSummaryInfo(DATASET + 'blah') with self.assertRaises(DBSReaderError): self.dbs.getDBSSummaryInfo(DATASET, BLOCK + 'asas') def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) self.assertEqual(0, block['OpenForWriting']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern self.assertTrue(block['PhEDExNodeList']) # weird error handling - depends on whether block or dataset is missing with self.assertRaises(DBSReaderError): self.dbs.getFileBlocksInfo(DATASET + 'blah') with self.assertRaises(DBSReaderError): self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas') def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse') def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(4, len(files)) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name']) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName']) self.assertEqual( '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4] + 'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') self.assertTrue(self.dbs.listFileBlockLocation(BLOCK)) # This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertTrue(self.dbs.listFileBlockLocation(WRONG_BLOCK)) # test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK]))) def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS) self.assertEqual(len(block), 1) block = block[BLOCK_WITH_PARENTS] self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS) self.assertEqual(1, len(parents)) self.assertEqual(PARENT_BLOCK, parents[0]['Name']) self.assertTrue(parents[0]['PhEDExNodeList']) self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK)) def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')