def main(): jip = JobInfoProcessor() fip = FileInfoProcessor() (workDir, nJobs, jobList) = getWorkJobs(sys.argv[1:]) for jobNum in sorted(jobList): if jip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum))[1] == 0: for fileInfo in fip.process(os.path.join(workDir, 'output', 'job_%d' % jobNum)): pathSE = fileInfo[OutputFileInfo.Path].replace('file://', '').replace('dir://', '') print('%s %s/%s' % (fileInfo[OutputFileInfo.Hash], pathSE, fileInfo[OutputFileInfo.NameDest]))
def main(): if opts.save_jobjson or opts.save_jobgc or opts.get_events: (workDir, nJobs, jobList) = getWorkJobs(args) (log, incomplete, splitter, splitInfo) = (None, False, None, {}) (lumiDict, readDict, writeDict) = ({}, {}, {}) try: splitter = DataSplitter.loadState(os.path.join(workDir, 'datamap.tar')) except Exception: pass jobList = sorted(jobList) for jobNum in jobList: del log log = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1])) jobInfo = getJobInfo(workDir, jobNum, lambda retCode: retCode == 0) if not jobInfo: if not incomplete: print 'WARNING: Not all jobs have finished - results will be incomplete!' incomplete = True continue if not parameterized: if splitter: splitInfo = splitter.getSplitInfo(jobNum) outputName = splitInfo.get(DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0)) else: outputName = jobInfo['file'].split()[2].replace("_%d_" % jobNum, '_').replace('/', '_').replace('__', '_') # Read framework report files to get number of events try: outputDir = os.path.join(workDir, 'output', 'job_' + str(jobNum)) for fwkXML in getCMSSWInfo(os.path.join(outputDir, 'cmssw.dbs.tar.gz')): for run in fwkXML.getElementsByTagName('Run'): for lumi in run.getElementsByTagName('LumiSection'): run_id = int(run.getAttribute('ID')) lumi_id = int(lumi.getAttribute('ID')) lumiDict.setdefault(outputName, {}).setdefault(run_id, set()).add(lumi_id) for outFile in fwkXML.getElementsByTagName('File'): pfn = outFile.getElementsByTagName('PFN')[0].childNodes[0].data if pfn not in writeDict.setdefault(outputName, {}): writeDict[outputName][pfn] = 0 writeDict[outputName][pfn] += int(outFile.getElementsByTagName('TotalEvents')[0].childNodes[0].data) for inFile in fwkXML.getElementsByTagName('InputFile'): if outputName not in readDict: readDict[outputName] = 0 readDict[outputName] += int(inFile.getElementsByTagName('EventsRead')[0].childNodes[0].data) except KeyboardInterrupt: sys.exit(os.EX_OK) except Exception: raise print 'Error while parsing framework output of job %s!' % jobNum continue del log log = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) del log for sample, lumis in lumis.items(): print 'Sample:', sample print '=========================================' print 'Number of events processed: %12d' % readDict[sample] print ' Number of events written: %12d' % sum(writeDict.get(sample, {}).values()) if writeDict.get(sample, None): print head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, map(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.save_jobjson: outputJSON(lumis, open(os.path.join(workDir, 'processed_%s.json' % sample), 'w')) print 'Saved processed lumi sections in', os.path.join(workDir, 'processed_%s.json' % sample) if opts.save_jobgc: print print 'List of processed lumisections:' print '-----------------------------------------' outputGC(lumis) print ########################### # Lumi filter manuipulation ########################### if opts.save_exprgc or opts.save_exprjson or opts.save_exprfull: if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.save_exprgc: outputGC(lumis) if opts.save_exprjson: outputJSON(lumis) if opts.save_exprfull: result = {} for rlrange in lumis: start, end = rlrange assert(start[0] == end[0]) llist = result.setdefault(start[0], []).extend(range(start[1], end[1] + 1)) print result