def main() : parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-i', '--input_dir') parser.add_option('-n', '--histoname') parser.add_option('-o', '--output_dir') parser.add_option('-v','--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_dir', 'histoname', 'output_dir'] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option') tag = opts.tag.strip('_') inputDirname = opts.input_dir inputDirname = inputDirname+'/' if not inputDirname.endswith('/') else inputDirname histoName = opts.histoname outputDirname = opts.output_dir outputDirname = outputDirname+'/' if not outputDirname.endswith('/') else outputDirname mkdirIfNeeded(outputDirname) verbose = opts.verbose if verbose : print ('\nUsing the following options:\n' +'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions)) inputFiles = getInputFiles(inputDirname, tag, verbose) assert all(f for f in inputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in inputFiles.iteritems()])) histograms = getHistograms(inputFiles, histoName) can = r.TCanvas('can_'+histoName, histoName, 800, 600) draw(can, histograms, label=histoName) can.SaveAs(outputDirname+'/'+histoName+'.png')
def runPlot(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) inputDir = outputDir+'/'+lepton+'/histos' outputDir = outputDir+'/'+lepton+'/plots' mkdirIfNeeded(outputDir) histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources)) for g in groups) groups_to_stack = [g.name for g in groups if not g.is_data] if verbose: print 'groups being included in the compositions: ',groups_to_stack for region in regions: all_histos = dict([(g.name, rootUtils.fetchObjectsFromFile(os.path.join(inputDir, g.name+'_'+region+'.root'), histonames[g.name][region], verbose)) for g in groups]) for v in variables_to_plot(): histos = dict() for s in leptonSources: histos[s] = summedHisto(histos=[all_histos[g][v][s] for g in groups_to_stack], label='') histos['data'] = all_histos['data'][v]['Unknown'] plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources, outputDir=outputDir+'/'+region, region=region, colors=fakeu.colorsFillSources(), verbose=verbose) return
def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False): "input: a dictionary of histos[group]" mkdirIfNeeded(outputDir) bkg_histos = dict([(k, h) for k, h in histos.iteritems() if k in stackkeys]) tot_bkg = summedHisto(bkg_histos.values(), label='') err_band = None # tmp disable # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg)) empty_bkg = tot_bkg.Integral() == 0 if empty_bkg: if verbose: print "empty backgrounds, skip %s" % tot_bkg.GetName() return histoname = tot_bkg.GetName() can = r.TCanvas('c_' + histoname, histoname, 800, 600) can.cd() pm = tot_bkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_' + tot_bkg.GetName(), '') can.Update() r.SetOwnership(stack, False) for s, h in bkg_histos.iteritems(): h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histos[datakey] if datakey and datakey in histos else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose: print "data : nEntries {:.1f} totWeight {:.1f} ".format( data.GetEntries(), data.Integral()) yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h]) # pm.SetMinimum(0.5) pm.SetMaximum(1.1 * yMax) can.Update() # can.SetLogy() topRightLabel(can, "#splitline{%s}{%s}" % (histoname, region), xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err': err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] + [data] can.Update() if verbose: print os.path.join(outputDir, histoname + '.png') can.SaveAs(os.path.join(outputDir, histoname + '.png'))
def main(): if len(sys.argv)!=3: print "Usage: {0} inputdir outputdir".format(sys.argv[0]) return inputdir = sys.argv[1] outputdir = sys.argv[2] verbose = True if not os.path.exists(inputdir): print "missing input dir {0}".format(inputdir) return utils.mkdirIfNeeded(outputdir) fake = systUtils.Group('fake') fake.setHistosDir(inputdir) fake.setSyst() # reset to nominal (state is undetermined after 'explore') c = r.TCanvas('c','') variables = ['mcollcoarse'] for jetnojet in regions_to_plot().keys(): for var in variables: sel_emu, sel_mue = regions_to_plot()[jetnojet] h_emu = fake.getHistogram(variable=var, selection=sel_emu, cacheIt=True) h_mue = fake.getHistogram(variable=var, selection=sel_mue, cacheIt=True) h_ratio = h_emu.Clone(h_emu.GetName().replace('emu', 'emu_over_mue')) h_ratio.Divide(h_mue) plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio, filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_wout_sys_err') h_with_totErrBand = {} # histo with stat+syst err (to get the correct error in the ratio) for sel in [sel_emu, sel_mue]: print ">>>plotting ",sel fake.setSystNominal() fake.setCurrentSelection(sel) fake.exploreAvailableSystematics(verbose) fakeSystematics = [s for s in fake.systematics if s!='NOM'] nominalHistoData = None nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosBkg = {'fake', nominalHistoFakeBkg} nominalHistoTotBkg = buildTotBackgroundHisto(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs={}) statErrBand = buildStatisticalErrorBand(nominalHistoTotBkg) systErrBand = buildFakeSystematicErrorBand(fake=fake, nominalHistosSimBkg={}, variable=var, selection=sel, variations=fakeSystematics, verbose=verbose) totErrBand = systUtils.addErrorBandsInQuadrature(statErrBand, systErrBand) # c.cd() # c.Clear() # nominalHistoFakeBkg.Draw() # totErrBand.Draw('E2 same') # totErrBand.SetFillStyle(3005) # for ext in ['png', 'eps']: # c.SaveAs("{0}/{1}_{2}.{3}".format(outputdir, sel, var, ext)) h_with_totErrBand[sel] = systUtils.setHistErrFromErrBand(nominalHistoFakeBkg, totErrBand) pprint.pprint(h_with_totErrBand) h_emu = [h for k,h in h_with_totErrBand.iteritems() if 'emu' in k][0] h_mue = [h for k,h in h_with_totErrBand.iteritems() if 'mue' in k][0] h_ratio = h_emu.Clone(h_mue.GetName().replace('emu', 'emu_over_mue')) h_ratio.Divide(h_mue) plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio, filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_with_sys_err') return
def main() : parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-i', '--input_dir') parser.add_option('-o', '--output_file') parser.add_option('-p', '--output_plot') parser.add_option('-v','--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot'] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option') tag = opts.tag inputDirname = opts.input_dir outputFname = opts.output_file outputPlotDir = opts.output_plot verbose = opts.verbose if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions) allInputFiles = getInputFiles(inputDirname, tag, verbose) # includes allBkg, which is used only for sys assert all(f for f in allInputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in allInputFiles.iteritems()])) mkdirIfNeeded(outputPlotDir) outputFile = r.TFile.Open(outputFname, 'recreate') inputFiles = dict((k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses()) buildMuonRates (inputFiles, outputFile, outputPlotDir, verbose) buildElectronRates(inputFiles, outputFile, outputPlotDir, verbose) buildSystematics (allInputFiles['allBkg'], outputFile) outputFile.Close() if verbose : print "output saved to \n%s"%'\n'.join([outputFname, outputPlotDir])
def submit_batch_fill_job_per_group(group, opts): options_dict = vars(opts) group_name = group.name if hasattr(group, 'name') else group systematic = opts.syst if hasattr(opts, 'syst') and opts.syst else None verbose = opts.verbose options_dict['group'] = group_name options_with_value = dict((k,v) for k,v in options_dict.iteritems() if v and v is not True) # note to self: the line below assumes that the argument-less options have a default=False options_with_toggle = dict((k,v) for k,v in options_dict.iteritems() if v and v is True and k!="batch") def escape_regex(v) : return v if v!='.*' else "'.*'" def back_to_dash(v) : return v.replace('_','-') cmd_line_options = ' '.join(["--%s %s"%(back_to_dash(k), escape_regex(str(v))) for k,v in options_with_value.iteritems()] +["--%s"%back_to_dash(k) for k in options_with_toggle.keys()]) template = 'batch/templates/plot_emu.sh' default_log_dir = opts.output_dir.replace('out/', 'log/') if default_log_dir.count('/histos')==1: default_log_dir = default_log_dir.replace('/histos','') log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir) script_dir = mkdirIfNeeded('batch/plot_emu') script_name = os.path.join(script_dir, group_name+("_{0}".format(systematic) if systematic else '')+'.sh') log_name = log_dir+'/'+group_name+("_{0}".format(systematic) if systematic else '')+'.log' script_file = open(script_name, 'w') script_file.write(open(template).read() .replace('%(opt)s', cmd_line_options) .replace('%(logfile)s', log_name) .replace('%(jobname)s', group_name) .replace('%(queue)s', opts.queue)) script_file.close() cmd = "sbatch %s"%script_name if verbose : print cmd out = getCommandOutput(cmd) if verbose : print out['stdout'] if out['stderr'] : print out['stderr']
def main(): options = parse_options() inputdf = options.input outdir = options.output_dir regexp = options.sample_regexp exclude = options.exclude_regexp tag = options.tag verbose = options.verbose debug = options.debug utils.mkdirIfNeeded(outdir) if debug: dataset.Dataset.verbose_parsing = True datasets = dataset.build_all_datasets_from_dir_or_file(inputdf) datasets = utils.filterWithRegexp(datasets, regexp, lambda _: _.name) if regexp else datasets datasets = utils.excludeWithRegexp( datasets, exclude, lambda _: _.name) if exclude else datasets counter = {'fail': 0, 'pass': 0} for d in datasets: outcome = 'pass' if d.build_filelist(gpatlas_dir(d, tag), outdir, verbose) else 'fail' counter[outcome] += 1 if verbose: print "created %d filelists (%d failures)" % (counter['pass'], counter['fail'])
def plotPerSourceEff(histosPerVar={}, outputDir='', lepton='', region='', sample='', verbose=False, zoomIn=True): "plot efficiency for each source (and 'anysource') as a function of each var; expect histos[var][source][loose,tight]" variables = histosPerVar.keys() sources = [s for s in first(histosPerVar).keys() if s!='real'] # only fake eff really need a scale factor colors = colorsLineSources mkdirIfNeeded(outputDir) for var in filter(lambda x : x in ['pt1', 'eta1'], histosPerVar.keys()): histosPerSource = dict((s, histosPerVar[var][s]) for s in sources) canvasBasename = region+'_efficiency_'+lepton+'_'+var+("_%s"%sample if sample else '') missingSources = [s for s, h in histosPerSource.iteritems() if not h['loose'] or not h['tight']] if missingSources: if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources)) continue anySourceLoose = summedHisto([h['loose'] for h in histosPerSource.values()]) anySourceTight = summedHisto([h['tight'] for h in histosPerSource.values()]) anySourceLoose.SetName(histoNamePerSource(var, 'any', 'loose', region)) anySourceTight.SetName(histoNamePerSource(var, 'any', 'tight', region)) histosPerSource['any'] = { 'loose' : anySourceLoose, 'tight' : anySourceTight } emptyBkg = anySourceLoose.Integral()==0 or anySourceTight.Integral()==0 if emptyBkg: if verbose : print "empty backgrounds, skip %s"%canvasBasename continue def computeEfficiencies(histosPerSource={}) : sources = histosPerSource.keys() num = dict((s, histosPerSource[s]['tight']) for s in sources) den = dict((s, histosPerSource[s]['loose']) for s in sources) eff = dict((s, h.Clone(h.GetName().replace('tight', 'tight_over_loose'))) for s, h in num.iteritems()) [eff[s].Divide(den[s]) for s in sources] return eff effs = computeEfficiencies(histosPerSource) can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600) can.cd() pm = first(effs) # pad master pm.SetStats(False) pm.Draw('axis') can.Update() for s, h in effs.iteritems() : h.SetMarkerColor(colors[s] if s in colors else r.kBlack) h.SetLineColor(h.GetMarkerColor()) h.SetLineWidth(2*h.GetLineWidth()) h.SetMarkerStyle(markersSources[s] if s in markersSources else r.kDot) h.Draw('ep same') h.SetDirectory(0) #pprint.pprint(effs) yMin, yMax = getMinMax(effs.values()) pm.SetMinimum(0.0) pm.SetMaximum(0.25 if yMax < 0.5 and zoomIn else 1.1) can.Update() topRightLabel(can, canvasBasename, xpos=0.125, align=13) drawLegendWithDictKeys(can, effs, opt='lp') can.RedrawAxis() can._histos = effs can.Update() outFname = os.path.join(outputDir, canvasBasename+'.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False): groups = histosPerGroup.keys() variables = first(histosPerGroup).keys() leptonTypes = first(first(histosPerGroup)).keys() colors = getGroupColor() mkdirIfNeeded(outputDir) histosPerName = dict([(region+'_'+var+'_'+lt, # one canvas for each histo, so key with histoname w/out group dict([(g, histosPerGroup[g][var][lt]) for g in groups])) for var in variables for lt in leptonTypes]) for histoname, histosPerGroup in histosPerName.iteritems(): missingGroups = [g for g, h in histosPerGroup.iteritems() if not h] if missingGroups: if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups)) continue bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if g not in ['data', 'signal']]) totBkg = summedHisto(bkgHistos.values()) err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 if emptyBkg: if verbose : print "empty backgrounds, skip %s"%histoname continue can = r.TCanvas('c_'+histoname, histoname, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+histoname,'') can.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histosPerGroup['data'] if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') # yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) # fixme with err_band yMin, yMax = 0.0, data.GetMaximum() pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, histoname, xpos=0.125, align=13) # drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f') drawLegendWithDictKeys(can, bkgHistos, opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() outFname = os.path.join(outputDir, histoname+'.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False): variables = histosPerVar.keys() sources = first(histosPerVar).keys() colors = colorsFillSources mkdirIfNeeded(outputDir) for var in variables: for lOrT in ['loose', 'tight']: histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources) canvasBasename = region + '_region_' + var + '_' + lOrT missingSources = [s for s, h in histos.iteritems() if not h] if missingSources: if verbose: print "skip %s, missing histos for %s" % ( var, str(missingSources)) continue totBkg = summedHisto(histos.values()) err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral() == 0 if emptyBkg: if verbose: print "empty backgrounds, skip %s" % canvasBasename continue can = r.TCanvas('c_' + canvasBasename, canvasBasename, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_' + canvasBasename, '') can.Update() r.SetOwnership(stack, False) for s, h in histos.iteritems(): h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') yMin, yMax = getMinMax( [h for h in [totBkg, err_band] if h is not None]) pm.SetMinimum(0.0) pm.SetMaximum(1.1 * yMax) can.Update() topRightLabel(can, canvasBasename, xpos=0.125, align=13) # drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f') drawLegendWithDictKeys(can, histos, opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] can.Update() outFname = os.path.join(outputDir, canvasBasename + '.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False): "histosPerGroup[group], where group=data is treated as special" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) missingGroups = [g for g, h in histosPerGroup.iteritems() if not h] if missingGroups: if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups)) return bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)]) totBkg = summedHisto(bkgHistos.values()) err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed if emptyBkg: if verbose : print "empty backgrounds, skip %s"%histoname return can = r.TCanvas(canvasname, canvastitle, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+histoname,'') can.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') err_band.Draw('E2 same') data = histosPerGroup['data'] if 'data' in histosPerGroup else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose : print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral()) else: print "no data" yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13) drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() filename=os.path.join(outputDir, histoname+'.png') rmIfExists(filename) can.SaveAs(filename)
def submit_batch_fill_job_per_group_per_selection(group=None, selection='', opts=None): "if we are processing cached selections, we can submit one job per selection" options_dict = vars(opts) group_name = group.name if hasattr(group, 'name') else group systematic = opts.syst if hasattr(opts, 'syst') and opts.syst else None verbose = opts.verbose options_dict['group'] = group_name options_dict['region'] = selection options_dict['regions'] = None options_with_value = dict( (k, v) for k, v in options_dict.iteritems() if v and v is not True) # note to self: the line below assumes that the argument-less options have a default=False options_with_toggle = dict((k, v) for k, v in options_dict.iteritems() if v and v is True and k != "batch") def escape_regex(v): return v if v != '.*' else "'.*'" def back_to_dash(v): return v.replace('_', '-') cmd_line_options = ' '.join([ "--%s %s" % (back_to_dash(k), escape_regex(str(v))) for k, v in options_with_value.iteritems() ] + ["--%s" % back_to_dash(k) for k in options_with_toggle.keys()]) template = 'batch/templates/plot_emu.sh' default_log_dir = opts.output_dir.replace('out/', 'log/') if default_log_dir.count('/histos') == 1: default_log_dir = default_log_dir.replace('/histos', '') log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir) script_dir = mkdirIfNeeded('batch/plot_emu') script_name = os.path.join( script_dir, group_name + '_' + selection + ("_{0}".format(systematic) if systematic else '') + '.sh') log_name = log_dir + '/' + group_name + '_' + selection + ( "_{0}".format(systematic) if systematic else '') + '.log' script_file = open(script_name, 'w') script_file.write( open(template).read().replace('%(opt)s', cmd_line_options).replace( '%(logfile)s', log_name).replace('%(jobname)s', group_name + '_' + selection).replace( '%(queue)s', opts.queue)) script_file.close() cmd = "sbatch %s" % script_name if verbose: print cmd out = getCommandOutput(cmd) if verbose: print out['stdout'] if out['stderr']: print out['stderr']
def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False): variables = histosPerVar.keys() sources = first(histosPerVar).keys() colors = colorsFillSources mkdirIfNeeded(outputDir) for var in variables: for lOrT in ['loose', 'tight']: histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources) canvasBasename = region+'_region_'+var+'_'+lOrT missingSources = [s for s, h in histos.iteritems() if not h] if missingSources: if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources)) continue totBkg = summedHisto(histos.values()) err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral()==0 if emptyBkg: if verbose : print "empty backgrounds, skip %s"%canvasBasename continue can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+canvasBasename,'') can.Update() r.SetOwnership(stack, False) for s, h in histos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') yMin, yMax = getMinMax([h for h in [totBkg, err_band] if h is not None]) pm.SetMinimum(0.0) pm.SetMaximum(1.1*yMax) can.Update() topRightLabel(can, canvasBasename, xpos=0.125, align=13) # drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f') drawLegendWithDictKeys(can, histos, opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] can.Update() outFname = os.path.join(outputDir, canvasBasename+'.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def runFill(opts) : batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_gen outputDir = opts.output_dir sysOption = opts.syst excludedSyst = opts.exclude verbose = opts.verbose if verbose : print "filling histos" mkdirIfNeeded(outputDir) systematics = ['NOM'] anySys = sysOption==None if sysOption=='fake' or anySys : systematics += systUtils.fakeSystVariations() if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations() if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')] elif sysOption in systUtils.getAllVariations() : systematics = [sysOption] elif not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption)) if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)] if verbose : print "about to loop over these systematics:\n %s"%str(systematics) for syst in systematics : if batchMode : newOptions = " --input-gen %s" % opts.input_gen newOptions += " --input-fake %s" % opts.input_fake newOptions += " --output-dir %s" % opts.output_dir newOptions += " --verbose %s" % opts.verbose newOptions += " --syst %s" % syst template = 'batch/templates/check_hft_fill.sh.template' script = "batch/hft_%s.sh"%syst scriptFile = open(script, 'w') scriptFile.write(open(template).read() .replace('%(opt)s', newOptions) .replace('%(logfile)s', 'log/hft/fill_'+syst+'.log') .replace('%(jobname)s', 'fill_'+syst)) scriptFile.close() cmd = "sbatch %s"%script if verbose : print cmd out = getCommandOutput(cmd) if verbose : print out['stdout'] if out['stderr'] : print out['stderr'] continue if verbose : print '---- filling ',syst samplesPerGroup = allSamplesAllGroups() [s.setSyst(syst) for g, samples in samplesPerGroup.iteritems() for s in samples] counters, histos = countAndFillHistos(samplesPerGroup=samplesPerGroup, syst=syst, verbose=verbose, outdir=outputDir) printCounters(counters) saveHistos(samplesPerGroup, histos, outputDir, verbose)
def main() : parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-i', '--input_dir') parser.add_option('-c', '--input_iter') parser.add_option('-o', '--output_dir') parser.add_option('-O', '--output_file',help='store ratio histograms here') parser.add_option('-v','--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_dir', 'input_iter', 'output_dir'] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option') tag = opts.tag.strip('_') inputDirname = opts.input_dir fnameInputIter = opts.input_iter outputDirname = opts.output_dir outputDirname = outputDirname+'/' if not outputDirname.endswith('/') else outputDirname outputFilename = opts.output_file mkdirIfNeeded(outputDirname) verbose = opts.verbose if verbose : print ('\nUsing the following options:\n' +'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions)) inputDirname = inputDirname+'/' if not inputDirname.endswith('/') else inputDirname fileData = r.TFile.Open(inputDirname+'data_' +tag+'.root') fileMc = r.TFile.Open(inputDirname+'allBkg_' +tag+'.root') fileHf = r.TFile.Open(inputDirname+'heavyflavor_'+tag+'.root') fileIter = r.TFile.Open(fnameInputIter) assert fileData, "Missing input file data %s"%str(fileData) assert fileMc, "Missing input file mc %s"%str(fileMc) assert fileHf, "Missing input file hf %s"%str(fileHf) assert fileIter, "Missing input file iter %s"%str(fileIter) outputFile = r.TFile.Open(outputFilename, 'recreate') if outputFilename else None el_conv_sf = computeAndPlotConvSf(fileData, fileMc, 'elec', 'all_l_pt', outputDirname, outputFile) el_qcd_sf = computeAndPlotHfSf (fileIter, fileHf, 'elec', 'all_l_pt', outputDirname, outputFile) mu_qcd_sf = computeAndPlotHfSf (fileIter, fileHf, 'muon', 'all_l_pt', outputDirname, outputFile) el_real_sf = computeAndPlotRealSf(fileData, fileMc, 'elec', 'all_l_pt', outputDirname) mu_real_sf = computeAndPlotRealSf(fileData, fileMc, 'muon', 'all_l_pt', outputDirname) el_conv_sf2d = computeAndPlotConvSf2d(fileData, fileMc, 'elec', 'all_l_pt', outputDirname) el_qcd_sf2d = computeAndPlotHfSf2d (fileIter, fileHf, 'elec', 'all_l_pt', outputDirname) mu_qcd_sf2d = computeAndPlotHfSf2d (fileIter, fileHf, 'muon', 'all_l_pt', outputDirname) print "# --- paste the lines below in buildWeightedMatrix.py ---" print "# %s, %s"%(tag, datetime.datetime.now()) print "mu_qcdSF, mu_realSF = %s, %s"%(mu_qcd_sf, mu_real_sf) print "el_convSF, el_qcdSF, el_realSF = %s, %s, %s"%(el_conv_sf, el_qcd_sf, el_real_sf) if outputFile : outputFile.Close()
def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False): "input: a dictionary of histos[group]" mkdirIfNeeded(outputDir) bkg_histos = dict([(k,h) for k,h in histos.iteritems() if k in stackkeys]) tot_bkg = summedHisto(bkg_histos.values(), label='') err_band = None # tmp disable # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg)) empty_bkg = tot_bkg.Integral()==0 if empty_bkg: if verbose : print "empty backgrounds, skip %s"%tot_bkg.GetName() return histoname = tot_bkg.GetName() can = r.TCanvas('c_'+histoname, histoname, 800, 600) can.cd() pm = tot_bkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_'+tot_bkg.GetName(),'') can.Update() r.SetOwnership(stack, False) for s, h in bkg_histos.iteritems() : h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histos[datakey] if datakey and datakey in histos else None if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') if verbose: print "data : nEntries {:.1f} totWeight {:.1f} ".format(data.GetEntries(), data.Integral()) yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h]) # pm.SetMinimum(0.5) pm.SetMaximum(1.1*yMax) can.Update() # can.SetLogy() topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.125, align=13) drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err':err_band}), opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()]+[data] can.Update() if verbose : print os.path.join(outputDir, histoname+'.png') can.SaveAs(os.path.join(outputDir, histoname+'.png'))
def main() : parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-f', '--input_fake') parser.add_option('-i', '--input_dir') parser.add_option('-o', '--output_dir') parser.add_option('-v','--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_fake', 'input_dir', 'output_dir',] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option') tag = opts.tag.strip('_') inputFakeFile = opts.input_fake inputDirname = opts.input_dir outputDir = opts.output_dir outputDir = outputDir if outputDir.endswith('/') else outputDir+'/' verbose = opts.verbose if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions) inputFiles = getInputFiles(inputDirname, tag, verbose) inputFiles[fakeSample()] = r.TFile.Open(inputFakeFile) assert all(f for f in inputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in inputFiles.iteritems()])) mkdirIfNeeded(outputDir) for region in ['cr8lptee', 'cr8lptmm', 'cr9lpt', 'sr8', 'sr9', 'srSsEwk', 'crSsEwkLoose'] : for channel in ['ee', 'em', 'mm'] : for varname in ['l0_pt', 'l1_pt', 'll_M', 'metrel', 'met', 'njets', 'nbjets'] : histo_basename = region+'_'+channel+'_'+varname hists, err2s = buildHists(inputFiles, histo_basename) if not hists[dataSample()].GetEntries() : continue err_band = buildErrBandGraph(hists['sm'], err2s) err_band_r = buildErrBandRatioGraph(err_band) can = r.TCanvas('can_'+histo_basename, histo_basename, 800, 600) botPad, topPad = buildBotTopPads(can) can.cd() topPad.Draw() drawTop(topPad, hists, err_band, (channel, region)) can.cd() botPad.Draw() drawBot(botPad, hists[dataSample()], hists['sm'], err_band_r, xaxisLabel(varname)) can.Update() outFilename = outputDir+histo_basename+'.png' rmIfExists(outFilename) # avoid root warnings can.SaveAs(outFilename) if verbose : print "output saved to \n%s"%outputDir
def runPlot(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) inputDir = outputDir + '/' + lepton + '/histos' outputDir = outputDir + '/' + lepton + '/plots' mkdirIfNeeded(outputDir) histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources)) for g in groups) groups_to_stack = [g.name for g in groups if not g.is_data] if verbose: print 'groups being included in the compositions: ', groups_to_stack for region in regions: all_histos = dict([(g.name, rootUtils.fetchObjectsFromFile( os.path.join(inputDir, g.name + '_' + region + '.root'), histonames[g.name][region], verbose)) for g in groups]) for v in variables_to_plot(): histos = dict() for s in leptonSources: histos[s] = summedHisto( histos=[all_histos[g][v][s] for g in groups_to_stack], label='') histos['data'] = all_histos['data'][v]['Unknown'] plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources, outputDir=outputDir + '/' + region, region=region, colors=fakeu.colorsFillSources(), verbose=verbose) return
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-i', '--input_dir') parser.add_option('-o', '--output_file') parser.add_option('-p', '--output_plot') parser.add_option('-v', '--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot'] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o): return not hasattr(opts, o) or getattr(opts, o) is None if any(optIsNotSpecified(o) for o in requiredOptions): parser.error('Missing required option') tag = opts.tag inputDirname = opts.input_dir outputFname = opts.output_file outputPlotDir = opts.output_plot verbose = opts.verbose if verbose: print '\nUsing the following options:\n' + '\n'.join( "%s : %s" % (o, str(getattr(opts, o))) for o in allOptions) allInputFiles = getInputFiles( inputDirname, tag, verbose) # includes allBkg, which is used only for sys assert all(f for f in allInputFiles.values()), ( "missing inputs: \n%s" % '\n'.join(["%s : %s" % kv for kv in allInputFiles.iteritems()])) mkdirIfNeeded(outputPlotDir) outputFile = r.TFile.Open(outputFname, 'recreate') inputFiles = dict( (k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses()) buildMuonRates(inputFiles, outputFile, outputPlotDir, verbose) buildElectronRates(inputFiles, outputFile, outputPlotDir, verbose) buildSystematics(allInputFiles['allBkg'], outputFile) outputFile.Close() if verbose: print "output saved to \n%s" % '\n'.join([outputFname, outputPlotDir])
def submit_batch_fill_job_per_group(group, opts): options_dict = vars(opts) group_name = group.name if hasattr(group, 'name') else group verbose = opts.verbose options_dict['group'] = group_name options_with_value = dict( (k, v) for k, v in options_dict.iteritems() if v and v is not True) # note to self: the line below assumes that the argument-less options have a default=False options_with_toggle = dict((k, v) for k, v in options_dict.iteritems() if v and v is True and k != "batch") def escape_regex(v): return v if v != '.*' else "'.*'" def back_to_dash(v): return v.replace('_', '-') cmd_line_options = ' '.join([ "--%s %s" % (back_to_dash(k), escape_regex(str(v))) for k, v in options_with_value.iteritems() ] + ["--%s" % back_to_dash(k) for k in options_with_toggle.keys()] + ['--just-fill']) template = 'batch/templates/plot_by_source.sh' default_log_dir = opts.output_dir.replace('out/', 'log/') if default_log_dir.count('/histos') == 1: default_log_dir = default_log_dir.replace('/histos', '') log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir) script_dir = mkdirIfNeeded('batch/plot_by_source') script_name = os.path.join(script_dir, group_name + '.sh') log_name = log_dir + '/' + group_name + '.log' script_file = open(script_name, 'w') script_file.write( open(template).read().replace('%(opt)s', cmd_line_options).replace( '%(logfile)s', log_name).replace('%(jobname)s', group_name).replace('%(queue)s', opts.queue)) script_file.close() cmd = "sbatch %s" % script_name if verbose: print cmd out = getCommandOutput(cmd) if verbose: print out['stdout'] if out['stderr']: print out['stderr']
def main() : parser = optparse.OptionParser(usage=usage) parser.add_option('-t', '--tag') parser.add_option('-i', '--input_dir') parser.add_option('-f', '--input_fractions') parser.add_option('-o', '--output_file') parser.add_option('-p', '--output_plot') parser.add_option('-s', '--input-el-sf', default=[], action='append', help='electron bin-by-bin scale factors (from compute_fake_el_scale_factor)') parser.add_option('-z', '--zoom-in', help='vertical axis efficiency plots') parser.add_option('-v','--verbose', action='store_true', default=False) (opts, args) = parser.parse_args() requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot'] otherOptions = ['verbose'] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option') tag = opts.tag inputDirname = opts.input_dir inputFracFname= opts.input_fractions inputSfFnames = opts.input_el_sf outputFname = opts.output_file outputPlotDir = opts.output_plot zoomIn = opts.zoom_in verbose = opts.verbose if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions) allInputFiles = getInputFiles(inputDirname, tag, verbose) # includes allBkg, which is used only for sys assert all(f for f in allInputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in allInputFiles.iteritems()])) if inputSfFnames and any([not os.path.exists(f) for f in inputSfFnames]) : parser.error("invalid electron sf file(s) %s"%inputSfFnames) outputPlotDir = outputPlotDir+'/' if not outputPlotDir.endswith('/') else '' mkdirIfNeeded(outputPlotDir) outputFile = r.TFile.Open(outputFname, 'recreate') inputFiles = dict((k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses()) inputFracFile = r.TFile.Open(inputFracFname) if inputFracFname else None if inputFracFname and not inputFracFile : parser.error("invalid fraction file %s"%inputFracFname) buildMuonRates (inputFiles, outputFile, outputPlotDir, inputFracFile=inputFracFile, verbose=verbose, zoomIn=zoomIn) buildElectronRates(inputFiles, outputFile, outputPlotDir, inputFracFile=inputFracFile, inputElecSfFiles=inputSfFnames, verbose=verbose, zoomIn=zoomIn) buildSystematics (allInputFiles['allBkg'], outputFile, verbose) outputFile.Close() if verbose : print "output saved to \n%s"%'\n'.join([outputFname, outputPlotDir])
def main(): options = parse_options() inputdf = options.input outdir = options.output_dir regexp = options.sample_regexp exclude = options.exclude_regexp tag = options.tag verbose = options.verbose debug = options.debug utils.mkdirIfNeeded(outdir) if debug : dataset.Dataset.verbose_parsing = True datasets = dataset.build_all_datasets_from_dir_or_file(inputdf) datasets = utils.filterWithRegexp (datasets, regexp, lambda _: _.name) if regexp else datasets datasets = utils.excludeWithRegexp(datasets, exclude, lambda _: _.name) if exclude else datasets counter = {'fail':0, 'pass':0} for d in datasets: outcome = 'pass' if d.build_filelist(gpatlas_dir(d, tag), outdir, verbose) else 'fail' counter[outcome] += 1 if verbose: print "created %d filelists (%d failures)" % (counter['pass'], counter['fail'])
def plotVar(bkgHistos, sigHistos, llnjvar, plotdir='./') : def preferredSignal(signals): pref = 'Herwigpp_sM_wA_noslep_notauhad_WH_2Lep_1' return pref if pref in signals else first(sorted(signals)) signalSample = preferredSignal(sigHistos.keys()) allHistos = bkgHistos.values() + [sigHistos[signalSample],] allHistosEmpty = all([h.GetEntries()==0 for h in allHistos]) if allHistosEmpty : return can = r.TCanvas('can_'+llnjvar, llnjvar, 800, 800) botPad, topPad = buildBotTopPads(can, splitFraction=0.75, squeezeMargins=False) totBkg = summedHisto(bkgHistos.values()) totBkg.SetDirectory(0) can._totBkg = totBkg can._histos = [bkgHistos, sigHistos] can.cd() botPad.Draw() drawBottom(botPad, totBkg, bkgHistos, sigHistos[signalSample], llnjvar) can.cd() topPad.Draw() drawTop(topPad, totBkg, sigHistos[signalSample]) mkdirIfNeeded(plotdir) outFilename = plotdir+'/'+llnjvar+'.png' rmIfExists(outFilename) # avoid root warnings can.SaveAs(outFilename)
def plotPerSourceEff(histosPerVar={}, outputDir='', lepton='', region='', sample='', verbose=False, zoomIn=True): "plot efficiency for each source (and 'anysource') as a function of each var; expect histos[var][source][loose,tight]" variables = histosPerVar.keys() sources = [s for s in first(histosPerVar).keys() if s != 'real'] # only fake eff really need a scale factor colors = colorsLineSources mkdirIfNeeded(outputDir) for var in filter(lambda x: x in ['pt1', 'eta1'], histosPerVar.keys()): histosPerSource = dict((s, histosPerVar[var][s]) for s in sources) canvasBasename = region + '_efficiency_' + lepton + '_' + var + ( "_%s" % sample if sample else '') missingSources = [ s for s, h in histosPerSource.iteritems() if not h['loose'] or not h['tight'] ] if missingSources: if verbose: print "skip %s, missing histos for %s" % (var, str(missingSources)) continue anySourceLoose = summedHisto( [h['loose'] for h in histosPerSource.values()]) anySourceTight = summedHisto( [h['tight'] for h in histosPerSource.values()]) anySourceLoose.SetName(histoNamePerSource(var, 'any', 'loose', region)) anySourceTight.SetName(histoNamePerSource(var, 'any', 'tight', region)) histosPerSource['any'] = { 'loose': anySourceLoose, 'tight': anySourceTight } emptyBkg = anySourceLoose.Integral() == 0 or anySourceTight.Integral( ) == 0 if emptyBkg: if verbose: print "empty backgrounds, skip %s" % canvasBasename continue def computeEfficiencies(histosPerSource={}): sources = histosPerSource.keys() num = dict((s, histosPerSource[s]['tight']) for s in sources) den = dict((s, histosPerSource[s]['loose']) for s in sources) eff = dict( (s, h.Clone(h.GetName().replace('tight', 'tight_over_loose'))) for s, h in num.iteritems()) [eff[s].Divide(den[s]) for s in sources] return eff effs = computeEfficiencies(histosPerSource) can = r.TCanvas('c_' + canvasBasename, canvasBasename, 800, 600) can.cd() pm = first(effs) # pad master pm.SetStats(False) pm.Draw('axis') can.Update() for s, h in effs.iteritems(): h.SetMarkerColor(colors[s] if s in colors else r.kBlack) h.SetLineColor(h.GetMarkerColor()) h.SetLineWidth(2 * h.GetLineWidth()) h.SetMarkerStyle(markersSources[s] if s in markersSources else r.kDot) h.Draw('ep same') h.SetDirectory(0) #pprint.pprint(effs) yMin, yMax = getMinMax(effs.values()) pm.SetMinimum(0.0) pm.SetMaximum(0.25 if yMax < 0.5 and zoomIn else 1.1) can.Update() topRightLabel(can, canvasBasename, xpos=0.125, align=13) drawLegendWithDictKeys(can, effs, opt='lp') can.RedrawAxis() can._histos = effs can.Update() outFname = os.path.join(outputDir, canvasBasename + '.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def runPlot(opts): inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) variables = variables_to_plot() groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) plot_groups = [systUtils.Group(g.name) for g in groups] sel_not_specified = len(regions_to_plot()) == len(selections) if sel_not_specified: selections = guess_available_selections_from_histofiles( inputDir, first(plot_groups), verbose) systematics_to_use = get_list_of_syst_to_fill(opts) for group in plot_groups: group.setCurrentSelection(first(selections)) group.setHistosDir(inputDir).setCurrentSelection(first(selections)) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose) available_systematics = sorted( list(set([s for g in plot_groups for s in g.systematics]))) systematics = [s for s in systematics_to_use if s in available_systematics] if verbose: print "using the following systematics : {0}".format(systematics) print "missing the following systematics : {0}".format( [s for s in systematics_to_use if s not in available_systematics]) fakeSystematics = [ s for s in systematics if s in systUtils.fakeSystVariations() ] mcSystematics = [ s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations() ] mkdirIfNeeded(outputDir) findByName = systUtils.findByName simBkgs = [g for g in plot_groups if g.isMcBkg] data = findByName(plot_groups, 'data') fake = findByName(plot_groups, 'fake') signal = findByName(plot_groups, 'signaltaumu') print 'names_stacked_groups to be improved' names_stacked_groups = [g.name for g in simBkgs + [fake]] for sel in selections: if verbose: print '-- plotting ', sel for var in variables: if verbose: print '---- plotting ', var print_summary_yield = var is 'onebin' for g in plot_groups: g.setSystNominal() g.setCurrentSelection(sel) nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict( [('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose, printYield=print_summary_yield) # if print_summary_yield: # print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, stack_order=names_stacked_groups, topLabel=sel, canvasName=(sel + '_' + var), outdir=outputDir, options=opts, printYieldSummary=print_summary_yield) for group in plot_groups: group.printVariationsSummary()
def runFill(opts): batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug: dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose: print "about to loop over these systematics:\n %s" % str(systematics) if verbose: print "about to loop over these regions:\n %s" % str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection( group=group, selection=selection, opts=opts) else: for group in groups: systematics = [ s for s in systematics if systUtils.Group(group.name).isNeededForSys(s) ] if not systematics: print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name == 'fake' else opts.input_other for ds in group.datasets: chain.Add( os.path.join( input_dir, systUtils.Sample( ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection( cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection(sel) ).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/tight_variables_plots', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames) regions = filestems assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions)) templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region} templateOutputFilename = "%(region)s_%(l)s_tight_plots.root" % {'region':region, 'l':lepton} treeName = treenames[regions.index(region)] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) if not samples : samples = [guessSampleFromFilename(f) for f in tupleFilenames] # if the fast guess didn't work, try the slow one samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt','eta','d0sig','z0SinTheta','etCone','ptCone','etConeCorr','ptConeCorr'] vars += ['relEtConeStd', 'relPtConeStd', 'relEtConeMod', 'relPtConeMod'] groups = samplesPerGroup.keys() sources = leptonSources #fill histos if doFillHistograms : lepLabel = "(probe %s)"%lepton histosPerGroup = bookHistosPerGroup(vars, groups, lepLabel=lepLabel) histosPerSource = bookHistosPerSource(vars, sources, lepLabel=lepLabel) for group in groups: isData = isDataSample(group) filenames = filenamesPerGroup[group] histosThisGroup = histosPerGroup[group] chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] print "%s : %d entries"%(group, chain.GetEntries()) fillHistos(chain, histosThisGroup, histosPerSource, isData, lepton, group, verbose) writeHistos(cacheFileName, {'perGroup':histosPerGroup, 'perSource':histosPerSource}, verbose) # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups), verbose) histosPerSource = fetchHistos(cacheFileName, histoNames(vars, sources), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, colors=SampleUtils.colors, verbose=verbose) plotStackedHistos(histosPerSource, outputDir+'/by_source', region, colors=fakeu.colorsFillSources(), verbose=verbose) plotIsoComparison(histosPerSource, outputDir+'/', region, lepton, verbose)
def runPlot(opts) : inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) variables = variables_to_plot() groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) plot_groups = [systUtils.Group(g.name) for g in groups] sel_not_specified = len(regions_to_plot())==len(selections) if sel_not_specified: selections = guess_available_selections_from_histofiles(inputDir, first(plot_groups), verbose) systematics_to_use = get_list_of_syst_to_fill(opts) for group in plot_groups : group.setCurrentSelection(first(selections)) group.setHistosDir(inputDir).setCurrentSelection(first(selections)) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose) available_systematics = sorted(list(set([s for g in plot_groups for s in g.systematics]))) systematics = [s for s in systematics_to_use if s in available_systematics] if verbose : print "using the following systematics : {0}".format(systematics) print "missing the following systematics : {0}".format([s for s in systematics_to_use if s not in available_systematics]) fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()] mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()] mkdirIfNeeded(outputDir) findByName = systUtils.findByName simBkgs = [g for g in plot_groups if g.isMcBkg] data = findByName(plot_groups, 'data') fake = findByName(plot_groups, 'fake') signal = findByName(plot_groups, 'signaltaumu') print 'names_stacked_groups to be improved' names_stacked_groups = [g.name for g in simBkgs+[fake]] for sel in selections : if verbose : print '-- plotting ',sel for var in variables : if verbose : print '---- plotting ',var print_summary_yield = var is 'onebin' for g in plot_groups : g.setSystNominal() g.setCurrentSelection(sel) nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose, printYield=print_summary_yield) # if print_summary_yield: # print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, stack_order=names_stacked_groups, topLabel=sel, canvasName=(sel+'_'+var), outdir=outputDir, options=opts, printYieldSummary=print_summary_yield) for group in plot_groups : group.printVariationsSummary()
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option("-n", "--n_iter", type="int", default=8) parser.add_option("-m", "--input_mc") parser.add_option("-d", "--input_data") parser.add_option("-o", "--output") parser.add_option("-p", "--plot", help="plot inputs") # todo: implement sanity plot vs. n_iter parser.add_option("-v", "--verbose", action="store_true", default=False) (opts, args) = parser.parse_args() requiredOptions = ["n_iter", "input_mc", "input_data", "output"] otherOptions = ["plot", "verbose"] allOptions = requiredOptions + otherOptions def optIsNotSpecified(o): return not hasattr(opts, o) or getattr(opts, o) is None if any(optIsNotSpecified(o) for o in requiredOptions): parser.error("Missing required option") nIter = opts.n_iter fnameInputMc = opts.input_mc fnameInputDa = opts.input_data fnameOutput = opts.output plotdir = opts.plot verbose = opts.verbose if verbose: print ( "\nUsing the following options:\n" + "\n".join("%s : %s" % (o, str(getattr(opts, o))) for o in allOptions) ) fileData = r.TFile.Open(fnameInputDa) fileMc = r.TFile.Open(fnameInputMc) if plotdir: mkdirIfNeeded(plotdir) assert fileData and fileMc, "Missing input files: data %s, mc %s" % (str(fileData), str(fileMc)) correctionHistos = {} for lep in ["muon", "elec"]: if verbose: print "Lepton: %s" % lep hRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt") hFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt") hFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt") hFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt") hFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt") if plotdir: hNumDen = [hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi] for nd in ["num", "den"]: plotHistos([h[nd] for h in hNumDen], "c_" + lep + "_" + nd, plotdir) plotHistosRatio(hNumDen, "c_" + lep + "_ratio", plotdir) h2dRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt_eta") h2dFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt_eta") h2dFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt_eta") h2dFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt_eta") h2dFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt_eta") def missingInputHisto(ndHistos): return any(not h for h in ndHistos.values()) histoCollToBeChecked = ["hRealDataCr", "hFakeDataLo", "hFakeDataHi", "hFakeMcLo", "hFakeMcHi"] missingHistos = dict( [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)] ) for v in histoCollToBeChecked: print "entries 1d %s : num %d den %d (%s)" % ( v, eval(v)["num"].GetEntries(), eval(v)["den"].GetEntries(), str(eval(v)["den"]), ) histoCollToBeChecked = ["h2dRealDataCr", "h2dFakeDataLo", "h2dFakeDataHi", "h2dFakeMcLo", "h2dFakeMcHi"] missingHistos = dict( [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)] ) for v in histoCollToBeChecked: print "entries 2d %s : num %d den %d (%s)" % ( v, eval(v)["num"].GetEntries(), eval(v)["den"].GetEntries(), str(eval(v)["den"]), ) print histoCollToBeChecked print missingHistos if len(missingHistos): print ( lep + " : missing histograms: \n" + "\n".join(["%s: num %s den %s" % (k, v["num"], v["den"]) for k, v in missingHistos.iteritems()]) ) continue correctionHistos[lep] = buildCorrectionHisto( hRealDataCr, hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi, nIter=nIter, verbose=verbose, histoname=lep + "_corHFRate", plotdir=plotdir, ) # here do the 2d ones print 10 * "--", " now doing the 2d ones ", 10 * "--" dummy = h2dRealDataCr["num"] xAx, yAx = dummy.GetXaxis(), dummy.GetYaxis() print dummy.GetName(), ": bins (%d, %d)" % (dummy.GetNbinsX(), dummy.GetNbinsY()) nEtaBins = yAx.GetNbins() print "nEtaBins: ", nEtaBins xMin, xMax = xAx.GetXmin(), xAx.GetXmax() etaBins = range(1, 1 + nEtaBins) for eb in etaBins: def etaSlice(h, b, p): return h.ProjectionX(p + h.GetName() + "_eta%d" % b, b, b) # prefix needed to avoid overwriting hRealDataCr = dict((k, etaSlice(h, eb, "rdc")) for k, h in h2dRealDataCr.iteritems()) hFakeDataLo = dict((k, etaSlice(h, eb, "fdl")) for k, h in h2dFakeDataLo.iteritems()) hFakeDataHi = dict((k, etaSlice(h, eb, "fdh")) for k, h in h2dFakeDataHi.iteritems()) hFakeMcLo = dict((k, etaSlice(h, eb, "fml")) for k, h in h2dFakeMcLo.iteritems()) hFakeMcHi = dict((k, etaSlice(h, eb, "fmh")) for k, h in h2dFakeMcHi.iteritems()) print "eta bin ", eb for k, h in hFakeDataLo.iteritems(): print "fakeDataLo %s : %s" % (k, lf2s(getBinContents(h))) correctionHistos[lep + "_eta%d" % eb] = buildCorrectionHisto( hRealDataCr, hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi, nIter=nIter, verbose=verbose, histoname=lep + "_corHFRate" + "_eta_bin%d" % eb, ) correctionHistos[lep + "_eta"] = combineEtaSlices( template2d=h2dRealDataCr["num"], etaSlicedRates=dict((k, h) for k, h in correctionHistos.iteritems() if (lep + "_eta") in k), histoname=lep + "_corHFRate_eta", ) print 10 * "--", " done ", 10 * "--" if verbose: print "saving output to ", fnameOutput fileOut = r.TFile.Open(fnameOutput, "recreate") fileOut.cd() print "keys ", correctionHistos.keys() for l, h in correctionHistos.iteritems(): if verbose: print "%s : writing %s\n%s" % (l, h.GetName(), histo1dToTxt(h)) h.Write() fileOut.Close()
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if opts.group : groups = [g for g in groups if g.name==opts.group] if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" outputDir = outputDir+'/'+lepton+'/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name+'.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root') if verbose : print 'saving to ',out_filename writeObjectsToFile(out_filename, histos, verbose)
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-m', '--mode', help='real, conv, hflf') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton mode = options.mode tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) validModesEl = ['real', 'hflf'] + ['conv'] validModesMu = ['real', 'hflf'] if mode not in (validModesEl if lepton=='el' else validModesMu) : parser.error("invalid mode %s"%mode) tupleStem, treeName = {'conv' : ('mcconv_tuple', 'ConversionExtractionRegion'), 'hflf' : ('mcqcd_tuple', 'HfLfExtractionRegion'), 'real' : ('mcreal_tuple', 'RealExtractionRegion') }[mode] templateInputFilename = "*_%(stem)s_%(tag)s.root" % {'tag':tag, 'stem':tupleStem} templateOutputFilename = "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton} outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs print 'input filenames: ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt', 'pt_eta'] groups = [g for g in samplesPerGroup.keys() if not isDataSample(g) and not g=='higgs'] if lepton=='el' : groups = [g for g in groups if g!='heavyflavor'] sourcesThisMode = {'real' : ['real'], # use same convention as in FakeLeptonSources.h 'conv' : ['conv'], 'hflf' : ['heavy', 'light', 'qcd'] }[mode] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode) for group in groups: filenames = filenamesPerGroup[group] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys()) histosAnyGroupPerSource = dict((v, histosPerGroupPerSource[v]['anygroup']) for v in histosPerGroupPerSource.keys()) chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) num_processed_entries += fillHistos(chain, histosThisGroupPerSource, histosAnyGroupPerSource, lepton, mode, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # compute efficiencies histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose) effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t] for s in sourcesThisMode: for v in vars: groups = first(effs).keys() varIs1D, varIs2D = v=='pt', v=='pt_eta' effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups) densThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g!='anygroup') numsThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['tight']) for g in groups if g!='anygroup') if varIs1D: cname = 'eff_'+lepton+'_'+s lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY zoomIn = True fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn) cname = 'stack_loose_'+lepton+'_'+s lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotParametrizedFractions.plotStackedHistos(densThisSourceThisVar, cname, outputDir, title) cname = 'stack_tight_'+lepton+'_'+s lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotParametrizedFractions.plotStackedHistos(numsThisSourceThisVar, cname, outputDir, title) elif varIs2D: cname = 'eff_'+lepton+'_'+s lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta' title = lT+' '+s+' '+lepton+';'+lX+';'+lY fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn) writeHistos(outputFileName, effs, verbose) if verbose : print "saved scale factors to %s" % outputFileName
def plotIsoComparison(histosPerSource={}, outputDir='', region='', lepton='', verbose=False): """ plot a comparison of eff(T|L) for real and for fake leptons vs. pt, where the numerator is one of the tight definitions """ var = 'pt' sources = histosPerSource.keys() lOrTOrTs = first(first(histosPerSource)).keys() histosPtPerSource = dict((s, dict((lt, histosPerSource[s][var][lt]) for lt in lOrTOrTs)) for s in sources) def buildTotFakeHistos(): "add up all the non-real (fake) sources" notRealSources = [s for s in sources if s!='real'] aSource = first(notRealSources) totFakeHistos = dict() for lt in ['loose', 'tight', 'tight_std', 'tight_minden', 'tight_tight']: template = histosPtPerSource[aSource][lt] h = template.Clone(template.GetName().replace(aSource, 'fake')) h.Reset() for s in sources : h.Add(histosPtPerSource[s][lt]) totFakeHistos[lt] = h return totFakeHistos histosPtPerSource['fake'] = buildTotFakeHistos() effReal_wh = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight' ], histosPtPerSource['real']['loose']) effReal_std = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_std' ], histosPtPerSource['real']['loose']) effReal_minden = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_minden'], histosPtPerSource['real']['loose']) effReal_tight = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_tight' ], histosPtPerSource['real']['loose']) effFake_wh = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight' ], histosPtPerSource['fake']['loose']) effFake_std = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_std' ], histosPtPerSource['fake']['loose']) effFake_minden = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_minden'], histosPtPerSource['fake']['loose']) effFake_tight = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_tight' ], histosPtPerSource['fake']['loose']) frameName, frameTitle = region+'_'+lepton, "fake and real efficiencies for %s in %s"%(lepton, region) can = r.TCanvas('c_'+frameName, frameTitle, 800, 600) can.cd() pm = effReal_wh pm.SetMinimum(0.0) pm.SetMaximum(1.1) pm.GetYaxis().SetTitle("#epsilon(T|L)") colorReal, colorFake = r.kBlue, r.kRed markerWh, markerStd, markerMinden, markerTight = r.kMultiply, r.kCircle, r.kOpenTriangleUp, r.kOpenSquare def setAttrs(h, mark, col): h.SetLineColor(col) h.SetMarkerColor(col) h.SetMarkerStyle(mark) setAttrs(effReal_wh, markerWh, colorReal) setAttrs(effReal_std, markerStd, colorReal) setAttrs(effReal_minden, markerMinden, colorReal) setAttrs(effReal_tight, markerTight, colorReal) setAttrs(effFake_wh, markerWh, colorFake) setAttrs(effFake_std, markerStd, colorFake) setAttrs(effFake_minden, markerMinden, colorFake) setAttrs(effFake_tight, markerTight, colorFake) pm.SetStats(0) pm.Draw('axis') #for h in [effReal_wh, effReal_std, effReal_tight, effFake_wh, effFake_std, effFake_tight]: for h in [effReal_wh, effReal_std, effReal_minden, effFake_wh, effFake_std, effFake_minden]: h.Draw('same') leg = rightLegend(can) leg.SetBorderSize(0) leg.AddEntry(r.TObject(), 'Real', '') leg.AddEntry(effReal_std, 'std iso', 'lp') #leg.AddEntry(effReal_tight, 'tight iso', 'lp') leg.AddEntry(effReal_minden,'minden iso', 'lp') leg.AddEntry(effReal_wh, 'wh iso', 'lp') leg.AddEntry(r.TObject(), 'Fake', '') leg.AddEntry(effFake_std, 'std iso', 'lp') #leg.AddEntry(effFake_tight, 'tight iso', 'lp') leg.AddEntry(effFake_minden,'minden iso', 'lp') leg.AddEntry(effFake_wh, ' wh iso', 'lp') leg.Draw() topRightLabel(can, "#splitline{%s}{%s}"%(lepton, region), xpos=0.125, align=13) can.RedrawAxis() can._histos = [effReal_wh, effReal_std, effFake_wh, effFake_std] can.Update() mkdirIfNeeded(outputDir) can.SaveAs(os.path.join(outputDir, frameTitle+'.png'))
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-c', '--comp-histos', help='output from compute_fake_compositions.py') parser.add_option('-e', '--eff-histos', default=[], action='append', help='output files from compute_eff_from_ntuple.py') parser.add_option('-r', '--region', help='where we have the compositions, and want the fake matrix, e.g. ssinc1j, emu') parser.add_option('-s', '--scale-factors', default=[], action='append', help='bin-by-bin data/mc from compute_fake_scale_factor') parser.add_option('-o', '--output-dir', default='./out/fake_weighted_average', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--also-anygroup', action='store_true', help='also build matrix without compositions,' ' to evaluate the systematic uncertainty on the composition') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir compFname = options.comp_histos effFnames = options.eff_histos region = options.region sfFnames = options.scale_factors outputDir = options.output_dir lepton = options.lepton verbose = options.verbose if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) if region not in ['emu', 'ssinc', 'ssinc1j','razor0j'] : parser.error("invalid region '%s'"%region) if not compFname or not os.path.exists(compFname) : parser.error("invalid composition file '%s'"%compFname) if not effFnames or not all(os.path.exists(f) for f in effFnames) : parser.error("invalid efficiency file '%s'"%str(effFnames)) if not sfFnames or not all(os.path.exists(f) for f in sfFnames) : # parser.error("invalid electron sf file(s) %s"%str(sfFnames)) print "missing sf files, using flat scale factors"# do not crash, fall back on flat scale factors optionsToPrint = ['inputDir', 'outputDir'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) mkdirIfNeeded(outputDir) # collect inputs regions = [region, ] groups=['diboson', 'heavyflavor', 'ttbar', 'wjets', 'zjets'] if lepton=='el' : groups = filter(lambda _ : _!='heavyflavor', groups) # note this must be in sync with compute_fake_compositions; TODO: implement a way to get the groups from the available histos compositions = fetchCompositionHistos(compFname, lepton, groups, regions, verbose) # [var][group][reg][orig], note here orig=[conv,heavy,light] # pprint.pprint(compositions) efficiencies = fetchEffienciesHistos(effFnames, lepton, groups, verbose) # [var][group][orig], note here orig=[conv,heavy,light,qcd] # pprint.pprint(efficiencies) scale_factor_histos = fetchSfHistos(sfFnames, lepton, verbose) convSF_vs_eta = scale_factor_histos['conv'] if lepton=='el' else None qcdSF_vs_eta = scale_factor_histos['hflf'] if 'hflf' in scale_factor_histos else None # if verbose: # print "convSF: "+("vs. eta {0}".format(getBinContents(convSF_vs_eta)) if convSF_vs_eta else el_convSF) # print "qcdSF: "+("vs. eta {0}".format(getBinContents(qcdSF_vs_eta)) if qcdSF_vs_eta else el_qcdSF) def scale_factor_to_str(sf): if 'vs_eta' in sf: return '['+', '.join("%.3f" % _ for _ in sf['vs_eta'])+']' else : return "%.3f" % sf['flat'] if lepton=='el': scaleFactors = {'conv' : {'flat' : el_convSF, 'vs_eta' : convSF_vs_eta}, 'heavy' : {'flat' : el_qcdSF, 'vs_eta' : qcdSF_vs_eta} } if verbose : print_scale_factor_dict(scaleFactors) scaleFakeEfficiencies(efficiencies, scaleFactors) elif lepton=='mu': scaleFactors = {'heavy' : {'flat' : mu_qcdSF, 'vs_eta' : qcdSF_vs_eta} } if verbose : print_scale_factor_dict(scaleFactors) scaleFakeEfficiencies(efficiencies, scaleFactors) # for now compute the weighted avg only for 'ssinc1j' avgEfficiencies = dict() for reg in first(first(compositions)).keys(): avgEfficiencies[reg] = dict() for var in ['pt', 'pt_eta']: is1D = var=='pt' lT = "%s #varepsilon(T|L) fake %s"%(reg, lepton) lX = 'p_{T} [GeV]' lY = '#varepsilon(T|L)' if is1D else '#eta' hname = "%(lep)s_fake_%(var)s_%(reg)s"%{'lep':lepton, 'var':var, 'reg':reg} htitle = lT+';'+lX+';'+lY groups = first(compositions).keys() origins = first(first(first(compositions))).keys() if verbose : print 'origins :',origins,'\n' + 'groups :',groups histosEff = dict((group+'_'+orig, efficiencies[var][group] [orig]) for group in groups for orig in origins) histosComp = dict((group+'_'+orig, compositions[var][group][reg][orig]) for group in groups for orig in origins) avgEff = weightedAverage(histosEff, histosComp, hname, htitle, verbose) avgEfficiencies[reg][var] = avgEff if is1D: fakeu.plot1dEfficiencies({reg : avgEff}, 'eff1d_'+lepton+'_fake_'+reg, outputDir, htitle, zoomIn=True) else: fakeu.plot2dEfficiencies({reg : avgEff}, 'eff2d_'+lepton+'_fake_'+reg, outputDir, htitle, zoomIn=True) writeHistos(os.path.join(outputDir,'fake_matrices_'+lepton+'.root'), avgEfficiencies, verbose) if options.also_anygroup:# test with the group-independent efficiencies print 'fetchCompositionHistos ',compFname compositions = fetchCompositionHistos(compFname, lepton, ['anygroup'], verbose) pprint.pprint(compositions) print 'fetchEffienciesHistos ',effFnames efficiencies = fetchEffienciesHistos(effFnames, lepton, ['anygroup'], verbose) avgEfficiencies = dict() for reg in first(first(compositions)).keys(): avgEfficiencies[reg] = dict() for var in ['pt', 'pt_eta']: is1D = var=='pt' lT = "%s #varepsilon(T|L) fake %s"%(reg, lepton) lX = 'p_{T} [GeV]' lY = '#varepsilon(T|L)' if is1D else '#eta' hname = "%(lep)s_fake_%(var)s_%(reg)s"%{'lep':lepton, 'var':var, 'reg':reg} htitle = lT+';'+lX+';'+lY groups = first(compositions).keys() origins = first(first(first(compositions))).keys() if verbose : print 'origins :',origins,'\n' + 'groups :',groups histosEff = dict((group+'_'+orig, efficiencies[var][group] [orig]) for group in groups for orig in origins) histosComp = dict((group+'_'+orig, compositions[var][group][reg][orig]) for group in groups for orig in origins) avgEff = weightedAverage(histosEff, histosComp, hname, htitle, verbose) avgEfficiencies[reg][var] = avgEff if is1D: fakeu.plot1dEfficiencies({reg : avgEff}, 'eff1d_'+lepton+'_fake_'+reg+'_anygroup', outputDir, htitle, zoomIn=True) else: fakeu.plot2dEfficiencies({reg : avgEff}, 'eff2d_'+lepton+'_fake_'+reg+'_anygroup', outputDir, htitle, zoomIn=True) writeHistos(os.path.join(outputDir,'fake_matrices_'+lepton+'_anygroup.root'), avgEfficiencies, verbose)
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if opts.group: groups = [g for g in groups if g.name == opts.group] if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" outputDir = outputDir + '/' + lepton + '/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'ss3l_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name + '.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill( chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name + '_' + sel + '.root') if verbose: print 'saving to ', out_filename writeObjectsToFile(out_filename, histos, verbose)
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames) regions = filestems assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions)) templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region} templateOutputFilename = "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton} treeName = treenames[regions.index(region)] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1'] groups = samplesPerGroup.keys() #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, groups, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region) for group in groups: isData = isDataSample(group) filenames = filenamesPerGroup[group] if verbose: print " --- group : %s ---".format(group) print '\n\t'.join(filenames) histosThisGroup = histosPerGroup[group] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys()) chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, region, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose) histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose) for g in groups: hps = dict((v, histosPerSamplePerSource[v][g])for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta (lepton) hn_sf_pt = histoname_sf_vs_pt (lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt (lepton) objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, verbose) rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose) if verbose : print "saved scale factors to %s" % outputFileName
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_el_scale_factor', help='dir for plots') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='where we want the compositions,' ' i.e. one of the regions for which we saved the fake nutples' ' (eg. ssinc1j_tuple*, emu_tuple*') parser.add_option('-s', '--syst-fudge', help='scale down main group (el:wjets, mu:bb/cc) to evaluate fraction syst unc') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton systfudge = options.syst_fudge region = options.region tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if not region : parser.error('region is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) outputDir = outputDir+'/'+lepton # split the output in subdirectories, so we don't overwrite things templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region} templateOutputFilename = "%(l)s_composition_histos.root" % {'l':lepton} treeName = dict(fakeu.tupleStemsAndNames)[region] outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) optionsToPrint = ['inputDir', 'outputDir', 'tag', 'doFillHistograms', 'systfudge'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) # collect inputs print '----> input files ',os.path.join(inputDir, templateInputFilename) tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt', 'eta', 'pt_eta', 'mt', 'mdeltar'] groups = samplesPerGroup.keys() if lepton=='el' : groups = [g for g in groups if g!='heavyflavor'] selections = [region] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, selections) for group in groups: isData = isDataSample(group) filenames = filenamesPerGroup[group] histosThisGroupPerSource = histosPerGroupPerSource[group] chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] print "%s : %d entries (%d files)"%(group, chain.GetEntries(), chain.GetListOfFiles().GetEntries()) num_processed_entries += fillHistos(chain, histosThisGroupPerSource, isData, lepton, group, region, verbose) writeHistos(cacheFileName, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # compute and plot fractions histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, selections)) histosCompositions = dict() for sel in selections: histosCompositions[sel] = dict() for var in vars: hs, groups = histosPerGroupPerSource, histosPerGroupPerSource.keys() groups = [g for g in groups if g!='data' and g!='higgs'] histosHeavy = dict((g, hs[g][sel]['heavy'][var]['loose']) for g in groups) histosLight = dict((g, hs[g][sel]['light'][var]['loose']) for g in groups) histosConv = dict((g, hs[g][sel]['conv' ][var]['loose']) for g in groups) normalizeHistos = plotParametrizedFractions.normalizeHistos plotStackedHistos = plotParametrizedFractions.plotStackedHistos frameTitle = 'hf '+lepton+': '+sel+' loose;'+var canvasName = lepton+'_hf'+sel+'_'+var+'_den' plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle) frameTitle = 'lf '+lepton+': '+sel+' loose;'+var canvasName = lepton+'_lf'+sel+'_'+var+'_den' plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle) frameTitle = 'conv '+lepton+': '+sel+' loose;'+var canvasName = lepton+'_conv'+sel+'_'+var+'_den' plotStackedHistos(histosConv, canvasName, outputDir, frameTitle) # normalize and draw fractions (den only) histos = dict([(k+'_heavy', h) for k,h in histosHeavy.iteritems()] + [(k+'_light', h) for k,h in histosLight.iteritems()] + [(k+'_conv', h) for k,h in histosConv.iteritems()]) if systfudge: fudgeCompositions(histosHeavy, histosLight, histosConv if lepton=='el' else None) normalizeHistos(histos) anygroupCompositions = buildCompositionsAddingGroups({'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv}) histosCompositions[sel][var] = {'bygroup':histos, 'anygroup': anygroupCompositions} is1Dhisto = var!='pt_eta' # can only stack 1D plots if is1Dhisto: histosBySource = {'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv} frameTitle = lepton+': '+sel+';'+var canvasBaseName = lepton+'_fake'+sel+'_'+var+'_frac' plotFractionsStacked(histosBySource, canvasBaseName+'_stack', outputDir, frameTitle) writeHistos(outputFileName, histosCompositions, verbose)
def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False): groups = histosPerGroup.keys() variables = first(histosPerGroup).keys() leptonTypes = first(first(histosPerGroup)).keys() colors = getGroupColor() mkdirIfNeeded(outputDir) histosPerName = dict([ ( region + '_' + var + '_' + lt, # one canvas for each histo, so key with histoname w/out group dict([(g, histosPerGroup[g][var][lt]) for g in groups])) for var in variables for lt in leptonTypes ]) for histoname, histosPerGroup in histosPerName.iteritems(): missingGroups = [g for g, h in histosPerGroup.iteritems() if not h] if missingGroups: if verbose: print "skip %s, missing histos for %s" % (histoname, str(missingGroups)) continue bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if g not in ['data', 'signal']]) totBkg = summedHisto(bkgHistos.values()) err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg)) emptyBkg = totBkg.Integral() == 0 if emptyBkg: if verbose: print "empty backgrounds, skip %s" % histoname continue can = r.TCanvas('c_' + histoname, histoname, 800, 600) can.cd() pm = totBkg # pad master pm.SetStats(False) pm.Draw('axis') can.Update() # necessary to fool root's dumb object ownership stack = r.THStack('stack_' + histoname, '') can.Update() r.SetOwnership(stack, False) for s, h in bkgHistos.iteritems(): h.SetFillColor(colors[s] if s in colors else r.kOrange) h.SetDrawOption('bar') h.SetDirectory(0) stack.Add(h) stack.Draw('hist same') # err_band.Draw('E2 same') data = histosPerGroup['data'] if data and data.GetEntries(): data.SetMarkerStyle(r.kFullDotLarge) data.Draw('p same') # yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) # fixme with err_band yMin, yMax = 0.0, data.GetMaximum() pm.SetMinimum(0.0) pm.SetMaximum(1.1 * yMax) can.Update() topRightLabel(can, histoname, xpos=0.125, align=13) # drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f') drawLegendWithDictKeys(can, bkgHistos, opt='f') can.RedrawAxis() can._stack = stack can._histos = [h for h in stack.GetHists()] + [data] can.Update() outFname = os.path.join(outputDir, histoname + '.png') utils.rmIfExists(outFname) can.SaveAs(outFname)
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) regions = kin.selection_formulas().keys() assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir) if options.group : groups = [g for g in groups if g.name==options.group] group_names = [g.name for g in groups] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos if verbose : utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name+'.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ',[c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list() print 'cached_tcuts ',[c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries"%(group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ',cut chain.preselect(cut) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g])for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta (lepton) hn_sf_pt = histoname_sf_vs_pt (lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt (lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose : print "saved scale factors to %s" % outputFileName
def runFill(opts) : batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug : dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose : print "about to loop over these systematics:\n %s"%str(systematics) if verbose : print "about to loop over these regions:\n %s"%str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts) else: for group in groups: systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)] if not systematics : print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name=='fake' else opts.input_other for ds in group.datasets: chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(sel)).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def subtractRealAndComputeScaleFactor(histosPerGroup={}, variable='', outRatiohistoname='',outDataeffhistoname='', outputDir='./', region='', subtractReal=True, verbose=False): "efficiency scale factor" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) histosPerType = dict([(lt, dict([(g, histosPerGroup[g][variable][lt]) for g in groups])) for lt in leptonTypes]) for lt in leptonTypes : histosPerType[lt]['totSimBkg'] = summedHisto([histo for group,histo in histosPerType[lt].iteritems() if group not in ['data', 'signal']]) simuTight = histosPerType['fake_tight']['totSimBkg'] simuLoose = histosPerType['fake_loose']['totSimBkg'] dataTight = histosPerType['tight' ]['data' ] dataLoose = histosPerType['loose' ]['data' ] # subtract real contribution from data # _Note to self_: currently estimating the real contr from MC; in # the past also used iterative corr, which might be more # appropriate in cases like here, where the normalization is # so-so. Todo: investigate the normalization. dataSubTight = dataTight.Clone(dataTight.GetName().replace('data_tight','data_minus_prompt_tight')) dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace('data_loose','data_minus_prompt_loose')) dataSubTight.SetDirectory(0) dataSubLoose.SetDirectory(0) dataSubTight.Add(histosPerType['real_tight']['totSimBkg'], -1.0 if subtractReal else 0.0) dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'], -1.0 if subtractReal else 0.0) effData = dataSubTight.Clone(outDataeffhistoname) effData.SetDirectory(0) effData.Divide(dataSubLoose) effSimu = simuTight.Clone(simuTight.GetName().replace('fake_tight','fake_eff')) effSimu.SetDirectory(0) effSimu.Divide(simuLoose) print "eff(T|L) vs. ",variable def formatFloat(floats): return ["%.4f"%f for f in floats] print "efficiency data : ",formatFloat(getBinContents(effData)) print "efficiency simu : ",formatFloat(getBinContents(effSimu)) ratio = effData.Clone(outRatiohistoname) ratio.SetDirectory(0) ratio.Divide(effSimu) print "sf data/simu : ",formatFloat(getBinContents(ratio)) print " +/- : ",formatFloat(getBinErrors(ratio)) can = r.TCanvas('c_'+outRatiohistoname, outRatiohistoname, 800, 600) botPad, topPad = rootUtils.buildBotTopPads(can) can.cd() topPad.Draw() topPad.cd() pm = effData pm.SetStats(0) pm.Draw('axis') xAx, yAx = pm.GetXaxis(), pm.GetYaxis() xAx.SetTitle('') xAx.SetLabelSize(0) yAx.SetRangeUser(0.0, 0.25) textScaleUp = 1.0/topPad.GetHNDC() yAx.SetLabelSize(textScaleUp*0.04) yAx.SetTitleSize(textScaleUp*0.04) yAx.SetTitle('#epsilon(T|L)') yAx.SetTitleOffset(yAx.GetTitleOffset()/textScaleUp) effSimu.SetLineColor(r.kRed) effSimu.SetMarkerStyle(r.kOpenCross) effSimu.SetMarkerColor(effSimu.GetLineColor()) effData.Draw('same') effSimu.Draw('same') leg = drawLegendWithDictKeys(topPad, {'data':effData, 'simulation':simuTight}, legWidth=0.4) leg.SetHeader('scale factor '+region+' '+('electron' if '_el_'in outRatiohistoname else 'muon' if '_mu_' in outRatiohistoname else '')) can.cd() botPad.Draw() botPad.cd() ratio.SetStats(0) ratio.Draw() textScaleUp = 1.0/botPad.GetHNDC() xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis() yAx.SetRangeUser(0.0, 2.0) xAx.SetTitle({'pt1':'p_{T}', 'eta1':'|#eta|', 'pt1_eta1':'p_{T}'}[variable]) yAx.SetNdivisions(-202) yAx.SetTitle('Data/Sim') yAx.CenterTitle() xAx.SetLabelSize(textScaleUp*0.04) xAx.SetTitleSize(textScaleUp*0.04) yAx.SetLabelSize(textScaleUp*0.04) yAx.SetTitleSize(textScaleUp*0.04) refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax()) refLine.Draw() can.Update() outFname = os.path.join(outputDir, region+'_'+outRatiohistoname) for ext in ['.eps','.png']: utils.rmIfExists(outFname+ext) can.SaveAs(outFname+ext) return {outRatiohistoname : ratio, outDataeffhistoname : effData, outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt') : dataSubTight, outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt') : dataSubLoose }
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option( '-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option( '--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option( '-T', '--tight-def', help= 'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.' ) parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton) regions = kin.selection_formulas().keys() assert region in regions, "invalid region '%s', must be one of %s" % ( region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( options.samples_dir) if options.group: groups = [g for g in groups if g.name == options.group] group_names = [g.name for g in groups] outputDir = outputDir + '/' + region + '/' + lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval( options.tight_def ) if options.tight_def else None # eval will take care of aborting on typos if verbose: utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms: start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name + '.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ', [c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ', str( [c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ', str( [c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list( ) print 'cached_tcuts ', [c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries" % (group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict( (v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ', cut chain.preselect(cut) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts ) == 1, "expecting only one cut, got {}".format( len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print("processed {0:d} entries ".format(num_processed_entries) + "in " + ("{0:d} min ".format(int(delta_time / 60)) if delta_time > 60 else "{0:.1f} s ".format(delta_time)) + "({0:.1f} kHz)".format(num_processed_entries / delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos( cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos( cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir + '/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta(lepton) hn_sf_pt = histoname_sf_vs_pt(lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt(lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor( histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile( outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose: print "saved scale factors to %s" % outputFileName
def subtractRealAndComputeScaleFactor(histosPerGroup={}, variable='', outRatiohistoname='', outDataeffhistoname='', outputDir='./', region='', subtractReal=True, verbose=False): "efficiency scale factor" groups = histosPerGroup.keys() mkdirIfNeeded(outputDir) histosPerType = dict([(lt, dict([(g, histosPerGroup[g][variable][lt]) for g in groups])) for lt in leptonTypes]) for lt in leptonTypes: histosPerType[lt]['totSimBkg'] = summedHisto([ histo for group, histo in histosPerType[lt].iteritems() if group not in ['data', 'signal'] ]) simuTight = histosPerType['fake_tight']['totSimBkg'] simuLoose = histosPerType['fake_loose']['totSimBkg'] dataTight = histosPerType['tight']['data'] dataLoose = histosPerType['loose']['data'] # subtract real contribution from data # _Note to self_: currently estimating the real contr from MC; in # the past also used iterative corr, which might be more # appropriate in cases like here, where the normalization is # so-so. Todo: investigate the normalization. dataSubTight = dataTight.Clone(dataTight.GetName().replace( 'data_tight', 'data_minus_prompt_tight')) dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace( 'data_loose', 'data_minus_prompt_loose')) dataSubTight.SetDirectory(0) dataSubLoose.SetDirectory(0) dataSubTight.Add(histosPerType['real_tight']['totSimBkg'], -1.0 if subtractReal else 0.0) dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'], -1.0 if subtractReal else 0.0) effData = dataSubTight.Clone(outDataeffhistoname) effData.SetDirectory(0) effData.Divide(dataSubLoose) effSimu = simuTight.Clone(simuTight.GetName().replace( 'fake_tight', 'fake_eff')) effSimu.SetDirectory(0) effSimu.Divide(simuLoose) print "eff(T|L) vs. ", variable def formatFloat(floats): return ["%.4f" % f for f in floats] print "efficiency data : ", formatFloat(getBinContents(effData)) print "efficiency simu : ", formatFloat(getBinContents(effSimu)) ratio = effData.Clone(outRatiohistoname) ratio.SetDirectory(0) ratio.Divide(effSimu) print "sf data/simu : ", formatFloat(getBinContents(ratio)) print " +/- : ", formatFloat(getBinErrors(ratio)) can = r.TCanvas('c_' + outRatiohistoname, outRatiohistoname, 800, 600) botPad, topPad = rootUtils.buildBotTopPads(can) can.cd() topPad.Draw() topPad.cd() pm = effData pm.SetStats(0) pm.Draw('axis') xAx, yAx = pm.GetXaxis(), pm.GetYaxis() xAx.SetTitle('') xAx.SetLabelSize(0) yAx.SetRangeUser(0.0, 0.25) textScaleUp = 1.0 / topPad.GetHNDC() yAx.SetLabelSize(textScaleUp * 0.04) yAx.SetTitleSize(textScaleUp * 0.04) yAx.SetTitle('#epsilon(T|L)') yAx.SetTitleOffset(yAx.GetTitleOffset() / textScaleUp) effSimu.SetLineColor(r.kRed) effSimu.SetMarkerStyle(r.kOpenCross) effSimu.SetMarkerColor(effSimu.GetLineColor()) effData.Draw('same') effSimu.Draw('same') leg = drawLegendWithDictKeys(topPad, { 'data': effData, 'simulation': simuTight }, legWidth=0.4) leg.SetHeader('scale factor ' + region + ' ' + ('electron' if '_el_' in outRatiohistoname else 'muon' if '_mu_' in outRatiohistoname else '')) can.cd() botPad.Draw() botPad.cd() ratio.SetStats(0) ratio.Draw() textScaleUp = 1.0 / botPad.GetHNDC() xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis() yAx.SetRangeUser(0.0, 2.0) xAx.SetTitle({ 'pt1': 'p_{T}', 'eta1': '|#eta|', 'pt1_eta1': 'p_{T}' }[variable]) yAx.SetNdivisions(-202) yAx.SetTitle('Data/Sim') yAx.CenterTitle() xAx.SetLabelSize(textScaleUp * 0.04) xAx.SetTitleSize(textScaleUp * 0.04) yAx.SetLabelSize(textScaleUp * 0.04) yAx.SetTitleSize(textScaleUp * 0.04) refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax()) refLine.Draw() can.Update() outFname = os.path.join(outputDir, region + '_' + outRatiohistoname) for ext in ['.eps', '.png']: utils.rmIfExists(outFname + ext) can.SaveAs(outFname + ext) return { outRatiohistoname: ratio, outDataeffhistoname: effData, outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt'): dataSubTight, outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt'): dataSubLoose }
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-m', '--mode', help='emu') parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-v', '--verbose', action='store_true', default=False) (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton mode = options.mode tag = options.tag verbose = options.verbose if not tag : parser.error('tag is a required option') if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) validModes = ['emu'] if mode not in validModes : parser.error("invalid mode %s"%mode) tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0] templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem} templateOutputFilename = "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton} outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def'] if verbose : print "working from %s"%os.getcwd() print "being called as : %s"%' '.join(os.sys.argv) print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint]) print 'input filenames: ',os.path.join(inputDir, templateInputFilename) # collect inputs tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename)) samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose)) samplesPerGroup = collections.defaultdict(list) filenamesPerGroup = collections.defaultdict(list) mkdirIfNeeded(outputDir) for s, f in zip(samples, tupleFilenames) : samplesPerGroup[s.group].append(s) filenamesPerGroup[s.group].append(f) vars = ['pt', 'pt_eta'] groups = [g for g in samplesPerGroup.keys() if g is not 'higgs'] if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor'] sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode) for group in groups: filenames = filenamesPerGroup[group] sources = histosPerGroupPerSource.keys() histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources) histosAnyGroupPerSource = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {} chain = r.TChain(treeName) [chain.Add(fn) for fn in filenames] if verbose: print "%s : %d entries"%(group, chain.GetEntries()) is_data = group in ['data'] print 'is_data ',is_data num_processed_entries += fillHistos(chain=chain, histosPerSource=histosThisGroupPerSource, histosPerSourceAnygroup=histosAnyGroupPerSource, lepton=lepton, onthefly_tight_def=onthefly_tight_def, verbose=verbose) writeHistos(cacheFileName, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time one_minute = 60 if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # plot histos histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose) # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t] for v in vars: varIs1D, varIs2D = v=='pt', v=='pt_eta' densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' cname = 'stack_loose_'+lepton lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) cname = 'stack_tight_'+lepton lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=fakeu.colorsFillSources(), verbose=verbose) for s in sourcesThisMode: for v in vars: groups = first(histosPerGroupPerSource).keys() varIs1D, varIs2D = v=='pt', v=='pt_eta' # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups) densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']}) numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight']) for g in groups if g not in ['anygroup','data']), {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']}) if varIs1D: # cname = 'eff_'+lepton+'_'+s lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # zoomIn = True # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn) cname = 'stack_loose_'+lepton+'_'+s lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(densThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) cname = 'stack_tight_'+lepton+'_'+s lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)' title = lT+' '+s+' '+lepton+';'+lX+';'+lY plotStackedHistosWithData(numsThisSourceThisVar, outputDir, cname, title, colors=SampleUtils.colors, verbose=verbose) # elif varIs2D: # cname = 'eff_'+lepton+'_'+s # lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta' # title = lT+' '+s+' '+lepton+';'+lX+';'+lY # fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn) # writeHistos(outputFileName, effs, verbose) if verbose : print "saved scale factors to %s" % outputFileName
def runPlot(opts) : inputDir = opts.input_dir outputDir = opts.output_dir sysOption = opts.syst excludedSyst = opts.exclude verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand groups = allGroups() selections = allRegions() variables = variablesToPlot() for group in groups : group.setHistosDir(inputDir) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(sysOption, excludedSyst, verbose) mkdirIfNeeded(outputDir) systematics = ['NOM'] anySys = sysOption==None if sysOption=='fake' or anySys : systematics += systUtils.fakeSystVariations() if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations() if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations() if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')] elif sysOption in systUtils.getAllVariations() : systematics = [sysOption] if not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption)) if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)] if verbose : print "using the following systematics : %s"%str(systematics) fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()] mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()] simBkgs = [g for g in groups if g.isMcBkg] data, fake, signal = findByName(groups, 'data'), findByName(groups, 'fake'), findByName(groups, 'signal') for sel in selections : if verbose : print '-- plotting ',sel for var in variables : if verbose : print '---- plotting ',var for g in groups : g.setSystNominal() nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, canvasName=(sel+'_'+var), outdir=outputDir, verbose=verbose) for group in groups : summary = group.variationsSummary() for selection, summarySel in summary.iteritems() : colW = str(12) header = ' '.join([('%'+colW+'s')%colName for colName in ['variation', 'yield', 'delta[%]']]) lineTemplate = '%(sys)'+colW+'s'+'%(counts)'+colW+'s'+'%(delta)'+colW+'s' print "---- summary of variations for %s ----" % group.name print "--- %s ---" % selection print header print '\n'.join(lineTemplate%{'sys':s, 'counts':(("%.3f"%c) if type(c) is float else (str(c)+str(type(c)))), 'delta' :(("%.3f"%d) if type(d) is float else '--' if d==None else (str(d)+str(type(d)))) } for s,c,d in summarySel)