def makePhEDExDrop(dbsUrl, datasetPath, *blockNames): """ _makePhEDExDrop_ Given a DBS2 Url, dataset name and list of blockNames, generate an XML structure for injection """ spec = XMLInjectionSpec(dbsUrl, datasetPath) reader = DBSReader(dbsUrl) for block in blockNames: blockContent = reader.getFileBlock(block) isOpen = reader.blockIsOpen(block) if isOpen: xmlBlock = spec.getFileblock(block, "y") else: xmlBlock = spec.getFileblock(block, "n") for x in blockContent[block]['Files']: checksums = {'cksum' : x['Checksum']} if x.get('Adler32') not in (None, ''): checksums['adler32'] = x['Adler32'] xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize']) improv = spec.save() xmlString = improv.makeDOMElement().toprettyxml() return xmlString
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False): """ _createJobSplitter_ Instantiate a JobSplitter instance for the dataset provided and populate it with details from DBS. """ reader = DBSReader(dbsUrl) result = JobSplitter(dataset) filterSites = len(siteWhitelist) > 0 filterBlocks = len(blockWhitelist) > 0 for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks): locations = reader.listFileBlockLocation(blockName) if filterBlocks: if blockName not in blockWhitelist: msg = "Excluding block %s based on block whitelist: %s\n" % (blockName, blockWhitelist) logging.debug(msg) continue if filterSites: siteMatches = filter(lambda x: x in locations, siteWhitelist) if len(siteMatches) == 0: msg = "Excluding block %s based on sites: %s \n" % (blockName, locations) logging.debug(msg) continue else: locations = siteMatches newBlock = result.newFileblock(blockName, *locations) if withParents == True: blockData = reader.getFileBlockWithParents(blockName)[blockName] else: blockData = reader.getFileBlock(blockName)[blockName] totalEvents = 0 fileList = set() for fileInfo in blockData["Files"]: totalEvents += fileInfo["NumberOfEvents"] fileList.add(fileInfo["LogicalFileName"]) if withParents: parList = [x["LogicalFileName"] for x in fileInfo["ParentList"]] newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"], parList) else: newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"]) logging.debug("Block %s contains %s events in %s files" % (blockName, totalEvents, len(fileList))) return result
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False): """ _createJobSplitter_ Instantiate a JobSplitter instance for the dataset provided and populate it with details from DBS. """ reader = DBSReader(dbsUrl) result = JobSplitter(dataset) filterSites = len(siteWhitelist) > 0 filterBlocks = len(blockWhitelist) > 0 for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks): locations = reader.listFileBlockLocation(blockName) if filterBlocks: if blockName not in blockWhitelist: msg = "Excluding block %s based on block whitelist: %s\n" % ( blockName, blockWhitelist) logging.debug(msg) continue if filterSites: siteMatches = filter(lambda x: x in locations, siteWhitelist) if len(siteMatches) == 0: msg = "Excluding block %s based on sites: %s \n" % ( blockName, locations, ) logging.debug(msg) continue else: locations = siteMatches newBlock = result.newFileblock(blockName, *locations) if withParents == True: blockData = reader.getFileBlockWithParents(blockName)[blockName] else: blockData = reader.getFileBlock(blockName)[blockName] totalEvents = 0 fileList = set() for fileInfo in blockData['Files']: totalEvents += fileInfo['NumberOfEvents'] fileList.add(fileInfo['LogicalFileName']) if withParents: parList = [ x['LogicalFileName'] for x in fileInfo['ParentList'] ] newBlock.addFile(fileInfo['LogicalFileName'], fileInfo['NumberOfEvents'], parList) else: newBlock.addFile(fileInfo['LogicalFileName'], fileInfo['NumberOfEvents']) logging.debug("Block %s contains %s events in %s files" % ( blockName, totalEvents, len(fileList), )) return result