def lumi_calc(opts, workDir, jobList, splitter): (lumiDict, readDict, writeDict) = process_jobs(opts, workDir, jobList, splitter) activity = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) activity.finish() for sample, lumi_list in lumis.items(): print('Sample: %s' % sample) if opts.job_events: print('=========================================') print('Number of events processed: %12s' % readDict.get(sample)) print(' Number of events written: %12d' % sum(writeDict.get(sample, {}).values())) if writeDict.get(sample, None): sys.stdout.write('\n') head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, lmap(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.job_json: json_fn = os.path.join(opts.output_dir or workDir, 'processed_%s.json' % sample) outputJSON(lumi_list, open(json_fn, 'w')) print('Saved processed lumi sections in ' + json_fn) if opts.job_gc: sys.stdout.write('\n') print('List of processed lumisections:') print('-----------------------------------------') outputGC(lumi_list) sys.stdout.write('\n')
def list_datasets(blocks): # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print('') infos = {} order = [] infosum = {DataProvider.Dataset: 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get( DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get( DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, lmap(lambda x: infos[x], order) + ['=', infosum])
def list_files(datasets, blocks): print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList]) print('')
def displayList(clsList): header = [('Name', 'Name')] fmtString = 'l' for entry in clsList: if entry['Alias']: header.append(('Alias', 'Alias')) fmtString = 'rl' break utils.printTabular(header, sorted(clsList, key = lambda x: x['Name'].lower()), fmtString = fmtString)
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(lambda key: pset.get(str(key)), stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend( sorted( imap( lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def displayList(clsList): header = [('Name', 'Name')] fmtString = 'l' for entry in clsList: if entry['Alias']: header.append(('Alias', 'Alias')) fmtString = 'rl' break utils.printTabular(header, sorted(clsList, key=lambda x: x['Name'].lower()), fmtString=fmtString)
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance(opts.producer, getConfig()) utils.printTabular( [(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: {0: nProd.getName('', ds, None), 1: ds}, toProcess), 'll')
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(pset.get, stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend(sorted(imap(lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance( opts.producer, getConfig()) utils.printTabular([(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: { 0: nProd.getName('', ds, None), 1: ds }, toProcess), 'll')
def lumi_calc(opts, workDir, jobList, splitter): (lumiDict, readDict, writeDict) = process_jobs(opts, workDir, jobList, splitter) activity = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) activity.finish() for sample, lumi_list in lumis.items(): print('Sample: %s' % sample) if opts.job_events: print('=========================================') print('Number of events processed: %12s' % readDict.get(sample)) print(' Number of events written: %12d' % sum(writeDict.get(sample, {}).values())) if writeDict.get(sample, None): sys.stdout.write('\n') head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular( head, lmap(lambda pfn: { 0: pfn, 1: writeDict[sample][pfn] }, writeDict[sample])) if opts.job_json: json_fn = os.path.join(opts.output_dir or workDir, 'processed_%s.json' % sample) outputJSON(lumi_list, open(json_fn, 'w')) print('Saved processed lumi sections in ' + json_fn) if opts.job_gc: sys.stdout.write('\n') print('List of processed lumisections:') print('-----------------------------------------') outputGC(lumi_list) sys.stdout.write('\n')
def list_datasets(blocks): # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print('') infos = {} order = [] infosum = {DataProvider.Dataset : 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, lmap(lambda x: infos[x], order) + ['=', infosum])
def list_blocks(blocks, headerbase): print('') utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks)
def main(): if opts.save_jobjson or opts.save_jobgc or opts.get_events: (workDir, nJobs, jobList) = getWorkJobs(args) (log, incomplete, splitter, splitInfo) = (None, False, None, {}) (lumiDict, readDict, writeDict) = ({}, {}, {}) try: splitter = DataSplitter.loadState(os.path.join(workDir, 'datamap.tar')) except Exception: pass jobList = sorted(jobList) for jobNum in jobList: del log log = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1])) jobInfo = getJobInfo(workDir, jobNum, lambda retCode: retCode == 0) if not jobInfo: if not incomplete: print 'WARNING: Not all jobs have finished - results will be incomplete!' incomplete = True continue if not parameterized: if splitter: splitInfo = splitter.getSplitInfo(jobNum) outputName = splitInfo.get(DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0)) else: outputName = jobInfo['file'].split()[2].replace("_%d_" % jobNum, '_').replace('/', '_').replace('__', '_') # Read framework report files to get number of events try: outputDir = os.path.join(workDir, 'output', 'job_' + str(jobNum)) for fwkXML in getCMSSWInfo(os.path.join(outputDir, 'cmssw.dbs.tar.gz')): for run in fwkXML.getElementsByTagName('Run'): for lumi in run.getElementsByTagName('LumiSection'): run_id = int(run.getAttribute('ID')) lumi_id = int(lumi.getAttribute('ID')) lumiDict.setdefault(outputName, {}).setdefault(run_id, set()).add(lumi_id) for outFile in fwkXML.getElementsByTagName('File'): pfn = outFile.getElementsByTagName('PFN')[0].childNodes[0].data if pfn not in writeDict.setdefault(outputName, {}): writeDict[outputName][pfn] = 0 writeDict[outputName][pfn] += int(outFile.getElementsByTagName('TotalEvents')[0].childNodes[0].data) for inFile in fwkXML.getElementsByTagName('InputFile'): if outputName not in readDict: readDict[outputName] = 0 readDict[outputName] += int(inFile.getElementsByTagName('EventsRead')[0].childNodes[0].data) except KeyboardInterrupt: sys.exit(os.EX_OK) except Exception: raise print 'Error while parsing framework output of job %s!' % jobNum continue del log log = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) del log for sample, lumis in lumis.items(): print 'Sample:', sample print '=========================================' print 'Number of events processed: %12d' % readDict[sample] print ' Number of events written: %12d' % sum(writeDict.get(sample, {}).values()) if writeDict.get(sample, None): print head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, map(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.save_jobjson: outputJSON(lumis, open(os.path.join(workDir, 'processed_%s.json' % sample), 'w')) print 'Saved processed lumi sections in', os.path.join(workDir, 'processed_%s.json' % sample) if opts.save_jobgc: print print 'List of processed lumisections:' print '-----------------------------------------' outputGC(lumis) print ########################### # Lumi filter manuipulation ########################### if opts.save_exprgc or opts.save_exprjson or opts.save_exprfull: if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.save_exprgc: outputGC(lumis) if opts.save_exprjson: outputJSON(lumis) if opts.save_exprfull: result = {} for rlrange in lumis: start, end = rlrange assert(start[0] == end[0]) llist = result.setdefault(start[0], []).extend(range(start[1], end[1] + 1)) print result
def main(): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section = 'global') config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections = ['parameters']) if opts.parameters: utils.vprint('Provided options:') for p in opts.parameters: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') utils.vprint('\t%s: %s' % (k.strip(), v.strip())) utils.vprint('') if not os.path.exists(args[0]): configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') # configParameters.set('parameter adapter', 'BasicParameterAdapter', '=') # Don't track parameter changes if opts.verbosity > 2: config.changeView(setSections = None).write(sys.stdout) # Initialize ParameterFactory configTask = config.changeView(setSections = [config.get(['task', 'module'], 'DummyTask')]) pm = config.getPlugin('parameter factory', 'SimpleParameterFactory', cls = ParameterFactory).getInstance() # Create dataset parameter source class DummySplitter: def getMaxJobs(self): return 3 def getSplitInfo(self, pNum): mkEntry = lambda ds, fl, n, nick: { DataSplitter.Dataset: ds, DataSplitter.Nickname: nick, DataSplitter.FileList: fl, DataSplitter.NEntries: n } rndStr = lambda: md5(str(random.random())).hexdigest()[:10] tmp = [ mkEntry('ds1', ['a', 'b'], 23, 'data_1'), mkEntry('ds1', ['1'], 42, 'data_1'), mkEntry('ds2', ['m', 'n'], 123, 'data_2'), mkEntry('ds2', ['x', 'y', 'z'], 987, 'data_3') ] return tmp[pNum] class DataSplitProcessorTest: def getKeys(self): return map(lambda k: ParameterMetadata(k, untracked=True), ['DATASETINFO', 'DATASETID', 'DATASETPATH', 'DATASETBLOCK', 'DATASETNICK']) def process(self, pNum, splitInfo, result): result.update({ 'DATASETINFO': '', 'DATASETID': splitInfo.get(DataSplitter.DatasetID, None), 'DATASETPATH': splitInfo.get(DataSplitter.Dataset, None), 'DATASETBLOCK': splitInfo.get(DataSplitter.BlockName, None), 'DATASETNICK': splitInfo.get(DataSplitter.Nickname, None), 'DATASETSPLIT': pNum, }) if opts.dataset.lower() == 'true': utils.vprint('Registering dummy data provider data') dataSplitter = DummySplitter() elif opts.dataset: dataSplitter = DataSplitter.loadState(opts.dataset) if opts.dataset: DataParameterSource.datasetsAvailable['data'] = DataParameterSource( config.getWorkPath(), 'data', None, dataSplitter, DataSplitProcessorTest()) psource = pm.getSource(config) if opts.forceiv: for dp in DataParameterSource.datasetSources: dp.intervention = (set([1]), set([0]), True) if opts.listparams: result = [] needGCParam = False if psource.getMaxJobs() != None: countActive = 0 for jobNum in range(psource.getMaxJobs()): info = psource.getJobInfo(jobNum) if info[ParameterInfo.ACTIVE]: countActive += 1 if opts.inactive or info[ParameterInfo.ACTIVE]: if not info[ParameterInfo.ACTIVE]: info['GC_PARAM'] = 'N/A' if str(info['GC_PARAM']) != str(jobNum): needGCParam = True result.append(info) if opts.displaymode == 'parseable': utils.vprint('Count,%d,%d' % (countActive, psource.getMaxJobs())) else: utils.vprint('Number of parameter points: %d' % psource.getMaxJobs()) if countActive != psource.getMaxJobs(): utils.vprint('Number of active parameter points: %d' % countActive) else: result.append(psource.getJobInfo(123)) enabledOutput = opts.output.split(',') output = filter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = filter(lambda k: k.untracked == False, output) untracked = filter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if (opts.collapse == 1): stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5(repr(map(lambda key: pset.get(key), stored))).hexdigest() result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = map(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(zip(stored, stored))) if opts.untracked: head.extend(sorted(map(lambda n: (n, '(%s)' % n), filter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result) if opts.save: utils.vprint('') ParameterSource.getClass('GCDumpParameterSource').write(opts.save, psource) utils.vprint('Parameter information saved to ./%s' % opts.save) if opts.intervention: utils.vprint('') tmp = psource.getJobIntervention() if tmp: if opts.displaymode == 'parseable': utils.vprint('R: %s' % str.join(',', map(str, tmp[0]))) utils.vprint('D: %s' % str.join(',', map(str, tmp[1]))) else: utils.vprint(' Redo: %r' % tmp[0]) utils.vprint('Disable: %r' % tmp[1]) else: if opts.displaymode == 'parseable': utils.vprint('NOINT') else: utils.vprint('No intervention')
def main(): dataset = args[0].strip() cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False', 'remove empty files *': 'False', 'location format *': opts.locationfmt, 'nickname check collision *': 'False'} if opts.metadata or opts.blockmetadata: cfgSettings['lumi filter *'] = '-' cfgSettings['keep lumi metadata *'] = 'True' config = getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings}) if os.path.exists(dataset): provider = DataProvider.getInstance('ListProvider', config, dataset, None) else: provider = DataProvider.create(config, dataset, opts.provider) blocks = provider.getBlocks() if len(blocks) == 0: raise DatasetError('No blocks!') datasets = set(map(lambda x: x[DataProvider.Dataset], blocks)) if len(datasets) > 1 or opts.info: headerbase = [(DataProvider.Dataset, 'Dataset')] else: print('Dataset: %s' % blocks[0][DataProvider.Dataset]) headerbase = [] if opts.configentry: print('') print('dataset =') infos = {} order = [] maxnick = 5 for block in blocks: dsName = block[DataProvider.Dataset] if not infos.get(dsName, None): order.append(dsName) infos[dsName] = dict([(DataProvider.Dataset, dsName)]) if DataProvider.Nickname not in block and opts.confignick: try: if '/' in dsName: block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1] else: block[DataProvider.Nickname] = dsName except Exception: pass if DataProvider.Nickname not in block and opts.confignick: block[DataProvider.Nickname] = np.getName(None, dsName, block) if DataProvider.Nickname in block: nick = block[DataProvider.Nickname] infos[dsName][DataProvider.Nickname] = nick maxnick = max(maxnick, len(nick)) if len(block[DataProvider.FileList]): infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL] for dsID, dsName in enumerate(order): info = infos[dsName] short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__) nickname = info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick) filterExpr = utils.QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '') print('\t%s : %s : %s%s' % (nickname, short, provider._datasetExpr, filterExpr)) if opts.listdatasets: # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print('') infos = {} order = [] infosum = {DataProvider.Dataset : 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, map(lambda x: infos[x], order) + ['=', infosum]) if opts.listblocks: print('') utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks) if opts.listfiles: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList]) print('') def printMetadata(src, maxlen): for (mk, mv) in src: if len(str(mv)) > 200: mv = '<metadata entry size: %s> %s...' % (len(str(mv)), repr(mv)[:200]) print('\t%s: %s' % (mk.rjust(maxlen), mv)) if src: print('') if opts.metadata and not opts.save: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mk_len = max(map(len, block.get(DataProvider.Metadata, ['']))) for f in block[DataProvider.FileList]: print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries])) printMetadata(zip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len) print('') if opts.blockmetadata and not opts.save: for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mkdict = lambda x: dict(zip(block[DataProvider.Metadata], x[DataProvider.Metadata])) metadata = utils.QM(block[DataProvider.FileList], mkdict(block[DataProvider.FileList][0]), {}) for fileInfo in block[DataProvider.FileList]: utils.intersectDict(metadata, mkdict(fileInfo)) printMetadata(metadata.items(), max(map(len, metadata.keys()))) if opts.liststorage: print('') infos = {} print('Storage elements:') for block in blocks: dsName = block[DataProvider.Dataset] if len(headerbase) > 0: print('Dataset: %s' % dsName) if block.get(DataProvider.BlockName, None): print('Blockname: %s' % block[DataProvider.BlockName]) if block[DataProvider.Locations] == None: print('\tNo location contraint specified') elif block[DataProvider.Locations] == []: print('\tNot located at anywhere') else: for se in block[DataProvider.Locations]: print('\t%s' % se) print('') if opts.info: evSum = 0 for block in blocks: blockId = '%s %s' % (block.get(DataProvider.Dataset, '-'), block.get(DataProvider.BlockName, '-')) blockStorage = '-' if block.get(DataProvider.Locations, None): blockStorage = str.join(',', block.get(DataProvider.Locations, '-')) evSum += block.get(DataProvider.NEntries, 0) print('%s %s %d %d' % (blockId, blockStorage, block.get(DataProvider.NEntries, 0), evSum)) if opts.save: print('') blocks = provider.getBlocks() if opts.sort: blocks.sort(key = lambda b: b[DataProvider.Dataset] + '#' + b[DataProvider.BlockName]) for b in blocks: b[DataProvider.FileList].sort(key = lambda fi: fi[DataProvider.URL]) provider.saveState(opts.save, blocks) print('Dataset information saved to ./%s' % opts.save)