def main(): if len(sys.argv)!=3: print "Usage: {0} inputdir outputdir".format(sys.argv[0]) return inputdir = sys.argv[1] outputdir = sys.argv[2] verbose = True if not os.path.exists(inputdir): print "missing input dir {0}".format(inputdir) return utils.mkdirIfNeeded(outputdir) fake = systUtils.Group('fake') fake.setHistosDir(inputdir) fake.setSyst() # reset to nominal (state is undetermined after 'explore') c = r.TCanvas('c','') variables = ['mcollcoarse'] for jetnojet in regions_to_plot().keys(): for var in variables: sel_emu, sel_mue = regions_to_plot()[jetnojet] h_emu = fake.getHistogram(variable=var, selection=sel_emu, cacheIt=True) h_mue = fake.getHistogram(variable=var, selection=sel_mue, cacheIt=True) h_ratio = h_emu.Clone(h_emu.GetName().replace('emu', 'emu_over_mue')) h_ratio.Divide(h_mue) plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio, filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_wout_sys_err') h_with_totErrBand = {} # histo with stat+syst err (to get the correct error in the ratio) for sel in [sel_emu, sel_mue]: print ">>>plotting ",sel fake.setSystNominal() fake.setCurrentSelection(sel) fake.exploreAvailableSystematics(verbose) fakeSystematics = [s for s in fake.systematics if s!='NOM'] nominalHistoData = None nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosBkg = {'fake', nominalHistoFakeBkg} nominalHistoTotBkg = buildTotBackgroundHisto(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs={}) statErrBand = buildStatisticalErrorBand(nominalHistoTotBkg) systErrBand = buildFakeSystematicErrorBand(fake=fake, nominalHistosSimBkg={}, variable=var, selection=sel, variations=fakeSystematics, verbose=verbose) totErrBand = systUtils.addErrorBandsInQuadrature(statErrBand, systErrBand) # c.cd() # c.Clear() # nominalHistoFakeBkg.Draw() # totErrBand.Draw('E2 same') # totErrBand.SetFillStyle(3005) # for ext in ['png', 'eps']: # c.SaveAs("{0}/{1}_{2}.{3}".format(outputdir, sel, var, ext)) h_with_totErrBand[sel] = systUtils.setHistErrFromErrBand(nominalHistoFakeBkg, totErrBand) pprint.pprint(h_with_totErrBand) h_emu = [h for k,h in h_with_totErrBand.iteritems() if 'emu' in k][0] h_mue = [h for k,h in h_with_totErrBand.iteritems() if 'mue' in k][0] h_ratio = h_emu.Clone(h_mue.GetName().replace('emu', 'emu_over_mue')) h_ratio.Divide(h_mue) plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio, filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_with_sys_err') return
def count_and_fill(chain, sample='', syst='', verbose=False, debug=False, blinded=True, onthefly_tight_def=None, tightight=False, quicktest=False, cached_cut=None, noncached_cuts=[]): """ count and fill for one sample (or group), one syst. """ sysGroup = systUtils.Group(sample).setSyst(syst) is_mc = systUtils.Group(sample).isMc is_data = systUtils.Group(sample).isData is_qflip_sample = dataset.DatasetGroup(sample).is_qflip assert bool(cached_cut) != bool( noncached_cuts ), "must choose either cached selection or non-cached selections: {}, {}".format( cached_cut, noncached_cuts) cuts = [cached_cut] if cached_cut else noncached_cuts if noncached_cuts: chain.preselect(None) selections = [c.GetName() for c in cuts] counters = book_counters(selections) histos = book_histograms(sample_name=sample, variables=variables_to_fill(), systematics=[syst], selections=selections)[syst] if is_qflip_sample: # for qflip, only fill ss histos selections = [s for s in selections if s.endswith('_ss')] weight_expr = 'event.pars.weight' weight_expr = sysGroup.weightLeafname qflip_expr = 'event.pars.qflipWeight' print 'weight_expr: ', weight_expr print 'selections: ', '\n'.join([ "%d) %s : %s" % (i, cut.GetName(), cut.GetTitle()) for i, cut in enumerate(cuts) ]) start_time = time.clock() num_total_entries = chain.GetEntries() num_processed_entries = 0 fields_to_print = [ 'l0_pt', 'l1_pt', 'l0_eta', 'l1_eta', 'met_pt', 'm_ll', 'pt_ll', 'dpt_l0_l1', 'dphi_l0_met', 'dphi_l1_met', 'dphi_l0_l1', 'mt0', 'mt1', 'n_soft_jets', 'eta_csj0', 'phi_csj0', 'eta_csj1', 'phi_csj1' ] if debug: print ",".join(fields_to_print) for iEntry, event in enumerate(chain): if quicktest and 100 * iEntry > num_total_entries: break run_num = event.pars.runNumber evt_num = event.pars.eventNumber l0 = addTlv(event.l0) l1 = addTlv(event.l1) met = addTlv(event.met) l0_is_el, l0_is_mu = l0.isEl, l0.isMu l1_is_el, l1_is_mu = l1.isEl, l1.isMu l0_is_t = onthefly_tight_def(l0) if onthefly_tight_def else l0.isTight l1_is_t = onthefly_tight_def(l1) if onthefly_tight_def else l1.isTight is_emu = int(l0_is_el and l1_is_mu) is_mue = int(l0_is_mu and l1_is_el) is_mumu = int(l0_is_mu and l1_is_mu) is_ee = int(l0_is_el and l1_is_el) is_same_sign = int((l0.charge * l1.charge) > 0) is_opp_sign = not is_same_sign is_qflippable = is_opp_sign and (l0_is_el or l1_is_el) and is_mc weight = eval(weight_expr) qflip_prob = eval(qflip_expr) # print "event : same sign {0}, opp_sign {1}, qflippable {2}, qflip_prob {3}".format(is_same_sign, is_opp_sign, is_qflippable, eval(qflip_expr)) l0_pt, l1_pt = l0.p4.Pt(), l1.p4.Pt() d_pt0_pt1 = l0_pt - l1_pt l0_eta, l1_eta = l0.p4.Eta(), l1.p4.Eta() l0_phi, l1_phi = l0.p4.Phi(), l1.p4.Phi() met_pt = met.p4.Pt() m_ll = (l0.p4 + l1.p4).M() pt_ll = (l0.p4 + l1.p4).Pt() dphi_l0_met = abs(l0.p4.DeltaPhi(met.p4)) dphi_l1_met = abs(l1.p4.DeltaPhi(met.p4)) dphi_l0_l1 = abs(l0.p4.DeltaPhi(l1.p4)) dpt_l0_l1 = l0.p4.Pt() - l1.p4.Pt() m_coll = computeCollinearMassLepTau(l0.p4, l1.p4, met.p4) mt0, mt1 = computeMt(l0.p4, met.p4), computeMt(l1.p4, met.p4) dphillbeta, mdr = computeRazor(l0.p4, l1.p4, met.p4) def jet_pt2(j): return j.px * j.px + j.py * j.py cl_jets = [addTlv(j) for j in event.jets if jet_pt2(j) > 30. * 30.] n_cl_jets = len(cl_jets) n_b_jets = event.pars.numBjets n_f_jets = event.pars.numFjets n_bf_jets = n_b_jets + n_f_jets n_jets = n_cl_jets + n_b_jets + n_f_jets # n_jets = event.pars.numFjets + event.pars.numBjets soft_jets = [addTlv(j) for j in event.jets if jet_pt2(j) < 30.**2] # todo: merge with cl_jets loop n_soft_jets = len(soft_jets) csj0 = first(sorted(soft_jets, key=lambda j: j.p4.DeltaR(l0.p4))) csj1 = first(sorted(soft_jets, key=lambda j: j.p4.DeltaR(l1.p4))) eta_csj0 = csj0.p4.Eta() if csj0 else -5.0 phi_csj0 = csj0.p4.Phi() if csj0 else -5.0 eta_csj1 = csj1.p4.Eta() if csj1 else -5.0 phi_csj1 = csj1.p4.Phi() if csj1 else -5.0 drl0csj = csj0.p4.DeltaR(l0.p4) if csj0 else None drl1csj = csj1.p4.DeltaR(l1.p4) if csj1 else None m_jj = (cl_jets[0].p4 + cl_jets[1].p4).M() if n_cl_jets > 1 else None deta_jj = abs(cl_jets[0].p4.Eta() - cl_jets[1].p4.Eta()) if n_cl_jets > 1 else None pass_sels = {} if tightight and not (l0_is_t and l1_is_t): continue for cut in cuts: sel = cut.GetName() sel_expr = cut.GetTitle() pass_sel = eval(sel_expr) # and (l0_pt>60.0 and dphi_l1_met<0.7) pass_sels[sel] = pass_sel is_ss_sel = sel.endswith('_ss') as_qflip = is_qflippable and (is_opp_sign and is_ss_sel) if is_qflip_sample and not as_qflip: pass_sel = False if not is_qflip_sample and as_qflip: pass_sel = False if not pass_sel: continue if pass_sel and not cached_cut: chain.add_entry_to_list(cut, iEntry) # <isElectron 1> <isElectron 2> <isTight 1> <isTight 2> <pt 1> <pt 2> <eta 1> <eta 2> lltype = "{0}{1}".format('e' if l0_is_el else 'mu', 'e' if l1_is_el else 'mu') qqtype = "{0}{1}".format('T' if l0_is_t else 'L', 'T' if l1_is_t else 'L') if debug: print ','.join([str(eval(_)) for _ in fields_to_print]) def fmt(b): return '1' if b else '0' # --- begin dbg # print "event: {0:12s} {1} {2} {3} {4} {5} {6} {7} {8}".format(lltype+' '+qqtype, #+' '+sel, # fmt(l0_is_el), fmt(l1_is_el), # fmt(l0_is_t), fmt(l1_is_t), # l0_pt, l1_pt, # l0.p4.Eta(), l1.p4.Eta()) # print "event: {0:12s} {1} {2} {3:.2f} {4:.2f}".format(lltype+' '+qqtype+' '+sel, # run_num, evt_num, # l0_pt, l1_pt) # --- end dbg fill_weight = (weight * qflip_prob) if as_qflip else weight h = histos[sel] h['onebin'].Fill(1.0, fill_weight) h['njets'].Fill(n_jets, fill_weight) h['pt0'].Fill(l0_pt, fill_weight) h['pt1'].Fill(l1_pt, fill_weight) h['d_pt0_pt1'].Fill(d_pt0_pt1, fill_weight) h['eta0'].Fill(l0_eta, fill_weight) h['eta1'].Fill(l1_eta, fill_weight) h['phi0'].Fill(l0_phi, fill_weight) h['phi1'].Fill(l1_phi, fill_weight) h['mll'].Fill(m_ll, fill_weight) h['ptll'].Fill(pt_ll, fill_weight) h['met'].Fill(met_pt, fill_weight) h['dphil0met'].Fill(dphi_l0_met, fill_weight) h['dphil1met'].Fill(dphi_l1_met, fill_weight) h['nsj'].Fill(n_soft_jets, fill_weight) h['pt0_vs_pt1'].Fill(l1_pt, l0_pt, fill_weight) h['met_vs_pt1'].Fill(l1_pt, met.p4.Pt(), fill_weight) h['dphil0l1_vs_pt1'].Fill(l1_pt, dphi_l0_l1, fill_weight) h['dphil0met_vs_pt1'].Fill(l1_pt, dphi_l0_met, fill_weight) h['dphil1met_vs_pt1'].Fill(l1_pt, dphi_l1_met, fill_weight) if n_soft_jets: h['drl0csj'].Fill(drl0csj, fill_weight) h['drl1csj'].Fill(drl1csj, fill_weight) if n_jets == 2 and n_cl_jets == 2: # fixme: f jets are not saved, but we need them for vbf h['m_jj'].Fill(m_jj, fill_weight) h['deta_jj'].Fill(deta_jj, fill_weight) if is_data and (blinded and 100.0 < m_coll and m_coll < 150.0): pass else: h['mcoll'].Fill(m_coll, fill_weight) h['mcollcoarse'].Fill(m_coll, fill_weight) h['mcoll_vs_pt1'].Fill(l1_pt, m_coll, fill_weight) counters[sel] += (fill_weight) # print ('e' if l0_is_el else 'm'),('e' if l1_is_el else 'm'),' : ', # print ' is_opp_sign: ',is_opp_sign, # print ' is_qflippable: ',is_qflippable, # print pass_sels num_processed_entries += 1 end_time = time.clock() delta_time = end_time - start_time if verbose: print( "processed {0:d} entries ".format(num_processed_entries) + "in " + ("{0:d} min ".format(int(delta_time / 60)) if delta_time > 60 else "{0:.1f} s ".format(delta_time)) + "({0:.1f} kHz)".format((num_processed_entries / delta_time) if delta_time else 1.0e9)) if verbose: for v in ['onebin']: #, 'pt0', 'pt1']: for sel in selections: h = histos[sel][v] print "{0}: integral {1}, entries {2}".format( h.GetName(), h.Integral(), h.GetEntries()) return counters, histos
def runPlot(opts): inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose mkdirIfNeeded(outputDir) buildTotBkg = systUtils.buildTotBackgroundHisto buildStat = systUtils.buildStatisticalErrorBand buildSyst = systUtils.buildSystematicErrorBand selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) variables = variables_to_plot() groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) plot_groups = [systUtils.Group(g.name) for g in groups] sel_not_specified = len(regions_to_plot()) == len(selections) if sel_not_specified: selections = guess_available_selections_from_histofiles( inputDir, first(plot_groups), verbose) systematics_to_use = get_list_of_syst_to_fill(opts) for group in plot_groups: group.setCurrentSelection(first(selections)) group.setHistosDir(inputDir).setCurrentSelection(first(selections)) group.exploreAvailableSystematics(verbose) group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose) available_systematics = sorted( list(set([s for g in plot_groups for s in g.systematics]))) systematics = [s for s in systematics_to_use if s in available_systematics] if verbose: print "using the following systematics : {0}".format(systematics) print "missing the following systematics : {0}".format( [s for s in systematics_to_use if s not in available_systematics]) fakeSystematics = [ s for s in systematics if s in systUtils.fakeSystVariations() ] mcSystematics = [ s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations() ] mkdirIfNeeded(outputDir) findByName = systUtils.findByName simBkgs = [g for g in plot_groups if g.isMcBkg] data = findByName(plot_groups, 'data') fake = findByName(plot_groups, 'fake') signal = findByName(plot_groups, 'signaltaumu') print 'names_stacked_groups to be improved' names_stacked_groups = [g.name for g in simBkgs + [fake]] for sel in selections: if verbose: print '-- plotting ', sel for var in variables: if verbose: print '---- plotting ', var print_summary_yield = var is 'onebin' for g in plot_groups: g.setSystNominal() g.setCurrentSelection(sel) nominalHistoData = data.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoSign = signal.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True) nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs]) nominalHistosBkg = dict( [('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()]) nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg) statErrBand = buildStat(nominalHistoTotBkg) systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel, fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose, printYield=print_summary_yield) # if print_summary_yield: # print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics) plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg, statErrBand=statErrBand, systErrBand=systErrBand, stack_order=names_stacked_groups, topLabel=sel, canvasName=(sel + '_' + var), outdir=outputDir, options=opts, printYieldSummary=print_summary_yield) for group in plot_groups: group.printVariationsSummary()
def runFill(opts): batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug: dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose: print "about to loop over these systematics:\n %s" % str(systematics) if verbose: print "about to loop over these regions:\n %s" % str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection( group=group, selection=selection, opts=opts) else: for group in groups: systematics = [ s for s in systematics if systUtils.Group(group.name).isNeededForSys(s) ] if not systematics: print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name == 'fake' else opts.input_other for ds in group.datasets: chain.Add( os.path.join( input_dir, systUtils.Sample( ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection( cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection(sel) ).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()