def runPlot(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) inputDir = outputDir+'/'+lepton+'/histos' outputDir = outputDir+'/'+lepton+'/plots' mkdirIfNeeded(outputDir) histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources)) for g in groups) groups_to_stack = [g.name for g in groups if not g.is_data] if verbose: print 'groups being included in the compositions: ',groups_to_stack for region in regions: all_histos = dict([(g.name, rootUtils.fetchObjectsFromFile(os.path.join(inputDir, g.name+'_'+region+'.root'), histonames[g.name][region], verbose)) for g in groups]) for v in variables_to_plot(): histos = dict() for s in leptonSources: histos[s] = summedHisto(histos=[all_histos[g][v][s] for g in groups_to_stack], label='') histos['data'] = all_histos['data'][v]['Unknown'] plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources, outputDir=outputDir+'/'+region, region=region, colors=fakeu.colorsFillSources(), verbose=verbose) return
def runPlot(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) inputDir = outputDir + '/' + lepton + '/histos' outputDir = outputDir + '/' + lepton + '/plots' mkdirIfNeeded(outputDir) histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources)) for g in groups) groups_to_stack = [g.name for g in groups if not g.is_data] if verbose: print 'groups being included in the compositions: ', groups_to_stack for region in regions: all_histos = dict([(g.name, rootUtils.fetchObjectsFromFile( os.path.join(inputDir, g.name + '_' + region + '.root'), histonames[g.name][region], verbose)) for g in groups]) for v in variables_to_plot(): histos = dict() for s in leptonSources: histos[s] = summedHisto( histos=[all_histos[g][v][s] for g in groups_to_stack], label='') histos['data'] = all_histos['data'][v]['Unknown'] plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources, outputDir=outputDir + '/' + region, region=region, colors=fakeu.colorsFillSources(), verbose=verbose) return
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-m', '--mode', help='emu') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton mode = options.mode tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) validModes = ['emu'] if mode not in validModes : parser.error("invalid mode %s"%mode) tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0] templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem} templateOutputFilename = "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton} outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) print 'input filenames: ',os.path.join(inputDir, templateInputFilename) # collect inputs tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt', 'pt_eta'] groups = [g for g in samplesPerGroup.keys() if g is not 'higgs'] if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor'] sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode) for group in groups: filenames = filenamesPerGroup[group] sources = histosPerGroupPerSource.keys() histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources) histosAnyGroupPerSource = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {} chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) is_data = group in ['data'] print 'is_data ',is_data num_processed_entries += fillHistos(chain=chain, histosPerSource=histosThisGroupPerSource, histosPerSourceAnygroup=histosAnyGroupPerSource, lepton=lepton, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time one_minute = 60 if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # plot histos histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose) # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t] for v in vars: varIs1D, varIs2D = v=='pt', v=='pt_eta' densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' cname = 'stack_loose_'+lepton lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) cname = 'stack_tight_'+lepton lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) for s in sourcesThisMode: for v in vars: groups = first(histosPerGroupPerSource).keys() varIs1D, varIs2D = v=='pt', v=='pt_eta' # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups) densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: # cname = 'eff_'+lepton+'_'+s lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # zoomIn = True # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn) cname = 'stack_loose_'+lepton+'_'+s lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) cname = 'stack_tight_'+lepton+'_'+s lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) # elif varIs2D: # cname = 'eff_'+lepton+'_'+s # lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn) # writeHistos(outputFileName, effs, verbose) if verbose : print "saved scale factors to %s" % outputFileName
colors=SampleUtils.colors, verbose=verbose) # elif varIs2D: # cname = 'eff_'+lepton+'_'+s # lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn) # writeHistos(outputFileName, effs, verbose) if verbose : print "saved scale factors to %s" % outputFileName #___________________________________________________ leptonTypes = ['loose', 'tight'] leptonSources = [] colorsFillSources = fakeu.colorsFillSources() colorsLineSources = fakeu.colorsLineSources() markersSources = fakeu.markersSources() enum2source = fakeu.enum2source def fillHistos(chain, histosPerSource, histosPerSourceAnygroup={}, lepton='', onthefly_tight_def=None, verbose=False): """fill the histograms, returns the number of events processed. histosPerSource is required; histosPerSourceAnygroup is filled only when provided""" class Counters: # scope trick (otherwise unavailable within nested func nLoose, nTight = 0, 0 totWeightLoose, totWeightTight = 0.0, 0.0 def str(self): counterNames = ['nLoose', 'nTight', 'totWeightLoose', 'totWeightTight'] return ', '.join(["%s : %.1f"%(c, getattr(self, c)) for c in counterNames]) counters = Counters()
histos['data'] = all_histos['data'][v]['Unknown'] plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources, outputDir=outputDir + '/' + region, region=region, colors=fakeu.colorsFillSources(), verbose=verbose) return #___________________________________________________ allLeptonSources = fakeu.allLeptonSources() leptonSources = fakeu.leptonSources() colorsFillSources = fakeu.colorsFillSources() colorsLineSources = fakeu.colorsLineSources() markersSources = fakeu.markersSources() enum2source = fakeu.enum2source def histoName(sample, var, selection, source): return 'h_' + sample + '_' + var + '_' + selection + '_' + source def histonamesOneSample(sample_name, variables, selections, sources): "dict of histogram names with keys [sel][var][source]" hn = histoName return dict([(se, dict([(v, dict([(so, hn(sample_name, v, se, so))
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/tight_variables_plots', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames) regions = filestems assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions)) templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region} templateOutputFilename = "%(region)s_%(l)s_tight_plots.root" % {'region':region, 'l':lepton} treeName = treenames[regions.index(region)] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) if not samples : samples = [guessSampleFromFilename(f) for f in tupleFilenames] # if the fast guess didn't work, try the slow one samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt','eta','d0sig','z0SinTheta','etCone','ptCone','etConeCorr','ptConeCorr'] vars += ['relEtConeStd', 'relPtConeStd', 'relEtConeMod', 'relPtConeMod'] groups = samplesPerGroup.keys() sources = leptonSources #fill histos if doFillHistograms : lepLabel = "(probe %s)"%lepton histosPerGroup = bookHistosPerGroup(vars, groups, lepLabel=lepLabel) histosPerSource = bookHistosPerSource(vars, sources, lepLabel=lepLabel) for group in groups: isData = isDataSample(group) filenames = filenamesPerGroup[group] histosThisGroup = histosPerGroup[group] chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] print "%s : %d entries"%(group, chain.GetEntries()) fillHistos(chain, histosThisGroup, histosPerSource, isData, lepton, group, verbose) writeHistos(cacheFileName, {'perGroup':histosPerGroup, 'perSource':histosPerSource}, verbose) # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups), verbose) histosPerSource = fetchHistos(cacheFileName, histoNames(vars, sources), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, colors=SampleUtils.colors, verbose=verbose) plotStackedHistos(histosPerSource, outputDir+'/by_source', region, colors=fakeu.colorsFillSources(), verbose=verbose) plotIsoComparison(histosPerSource, outputDir+'/', region, lepton, verbose)