def combine_templates(templates, patterns, conf): """ Args: """ hists = {} hsources = [] for k in ["data", "mc_nom", "mc_varsamp", "mc_varproc"]: items = templates[patterns[k]] if len(items)==0: raise MatchingException("Nothing matched to %s:%s" % (k, patterns[k])) hsources += items if len(hsources)==0: raise ValueError("No histograms matched") hqcd = NestedDict() hqcd["nominal"][None] = [] for syst in ["yield", "iso"]: for sdir in ["up", "down"]: hqcd[syst][sdir] = [] templates_qcd = templates[patterns["data_antiiso"]] if len(templates_qcd)==0: raise MatchingException("Nothing matched to %s:%s" % ("data_antiiso", patterns["data_antiiso"])) for keys, hist in templates[patterns["data_antiiso"]]: if keys[1].startswith("antiiso"): #We have isolation variations isodir = keys[1].split("_")[1] if isodir=="nominal": hqcd["nominal"][None].append(hist) hup = hist.Clone() hdown = hist.Clone() hup.Scale(qcd_yield_variations[0]) hdown.Scale(qcd_yield_variations[1]) hqcd["yield"]["up"].append(hup) hqcd["yield"]["down"].append(hdown) elif isodir in ["up", "down"]: hqcd["iso"][isodir].append(hist) else: raise ValueError("Undefined isolation variation direction: %s" % isodir) #We only have the nominal QCD shape elif keys[1]=="weight__unweighted": hqcd["nominal"][None].append(hist) hup = hist.Clone() hdown = hist.Clone() hup.Scale(qcd_yield_variations[0]) hdown.Scale(qcd_yield_variations[1]) hqcd["yield"]["up"].append(hup) hqcd["yield"]["down"].append(hdown) #Placeholders for the isolation variation for isodir in ["up", "down"]: h = hist.Clone() hqcd["iso"][isodir].append(h) else: raise Exception("Couldn't parse the QCD pattern: %s" % str(keys)) def map_leaves(di, f, equate=True): for k, v in di.items(): if isinstance(v, dict): map_leaves(v, f) else: if equate: di[k] = f(v) else: f(v) return di #Sum the anti-iso data subsamples map_leaves(hqcd, lambda li: reduce(lambda x,y: x+y, li)) #Normalize the isolation variations to the nominal map_leaves(hqcd["iso"], lambda hi: hi.Scale(hqcd["nominal"][None].Integral() / hi.Integral()) if hi.Integral()>0 else 0, equate=False ) #Add the variated data-driven QCD templates hsources += [ (("data", "qcd", "weight__unweighted"), hqcd["nominal"][None]), (("data", "qcd", "weight__qcd_yield_up"), hqcd["yield"]["up"]), (("data", "qcd", "weight__qcd_yield_down"), hqcd["yield"]["down"]), (("data", "qcd", "weight__qcd_iso_up"), hqcd["iso"]["up"]), (("data", "qcd", "weight__qcd_iso_down"), hqcd["iso"]["down"]), ] #f = open('temp.pickle','wb') #pickle.dump(hsources, f) #f.close() #load the histos from the temporary pickle #f = open('temp.pickle','rb') #hsources = pickle.load(f) syst_scenarios = NestedDict() for (sample_var, sample, weight_var), hist in hsources: make_hist(hist) # if "__ele" in weight_var: # continue if ".root" in sample: sample = sample[:sample.index(".root")] if "__" in weight_var: spl = weight_var.split("__") wn = spl[1] else: wn = weight_var sample_var = sample_var.lower() wtype = None wdir = None stype = None sdir = None syst = None #Nominal weight, look for variated samples if wn=="nominal": syst = sample_var elif wn=="unweighted": syst="unweighted" else: #Variated weight, use only nominal sample or data in case of data-driven shapes if not (sample_var=="nominal" or sample_var=="data"): continue syst = wn if wn==conf.get_nominal_weight() and sample_var=="nominal": logger.info("Using %s:%s as nominal sample for %s" % (wn, sample_var, sample)) syst_scenarios[sample]["nominal"][None] = hist #A systematic scenario which has a separate systematic sample elif sample_var == "syst": try: r = get_syst_from_sample_name(sample) except Exception as e: logger.warning("Unhandled systematic: %s" % str(e)) r = None if not r: continue sample, systname, d = r #sample = map_syst_sample_to_nominal(sample) syst_scenarios[sample][systname][d] = hist else: logger.debug("Systematically variated weight: %s:%s %s" % (wn, sample_var, sample)) systname, d = get_updown(syst) syst_scenarios[sample][systname][d] = hist logger.info("histogram W3Jets_exclusive nominal: " + "%f %d" % ( syst_scenarios["W3Jets_exclusive"]["nominal"][None].Integral(), syst_scenarios["W3Jets_exclusive"]["nominal"][None].GetEntries()) ) ###################################### ### Save systematics, fill missing ### ###################################### ######### # tchan # ######### #T_t_ToLeptons mass_up is missing, take the mass down and flip the difference with the nominal mnomt = syst_scenarios["T_t_ToLeptons"]["nominal"][None].Clone() mdownt = syst_scenarios["T_t_ToLeptons"]["mass"]["down"].Clone() mupt = (mnomt+mnomt-mdownt) syst_scenarios["T_t_ToLeptons"]["mass"]["up"] = mupt ######### # TTBar # ######### #TTbar variations are provided for the inclusive only, fill them for the exclusive nom_ttbar = syst_scenarios["TTJets_FullLept"]["nominal"][None] + syst_scenarios["TTJets_SemiLept"]["nominal"][None] for syst in ["mass", "ttbar_scale", "ttbar_matching"]: for sample in ["TTJets_FullLept", "TTJets_SemiLept"]: for sd in ["up", "down"]: syst_scenarios[sample][syst][sd] = syst_scenarios[sample]["nominal"][None] * syst_scenarios["TTJets"][syst][sd] / nom_ttbar syst_scenarios.pop("TTJets") syst_scenarios = syst_scenarios.as_dict() #Create the output file p = os.path.dirname(conf.get_outfile_unmerged()) if not os.path.exists(p): os.makedirs(p) of = ROOT.TFile(conf.get_outfile_unmerged() , "RECREATE") of.cd() #Get the list of all possible systematic scenarios that we have available allsysts = get_all_systs(syst_scenarios) for sampn, h1 in syst_scenarios.items(): #Consider all the possible systematic scenarios for systname in allsysts: #If we have it available, fine, use it if systname in h1.keys(): h2 = h1[systname] #If not, in case of MC and a non-trivial variation elif not sampn.startswith("Single") and systname not in ["unweighted", "nominal"]: #Try to get the unvariated template as a placeholder h = h1.get("nominal", None) if not h: h = h1.get("unweighted", None) if not h: raise Exception("Could not get the nominal template for %s:%s" % (sampn, systname)) #Our convention is that even the unvariated template is a dict with a single #key for the direction of variation, which is 'None' h = h[None] #Add placeholder templates for systdir in ["up", "down"]: h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir)) set_missing_hist(h) #Save to file h.SetDirectory(of) h.Write() continue else: continue for systdir, h in h2.items(): if systdir==None and systname=="nominal" or not sample_types.is_mc(sampn): h = h.Clone(hname_encode(conf.varname, sampn)) elif systdir==None and systname=="unweighted": h = h.Clone(hname_encode(conf.varname, sampn, "unweighted")) else: h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir)) h.SetDirectory(of) h.Write() nkeys = len(of.GetListOfKeys()) logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath())) # of.Close() ######################## ### Load systematics ### ######################## # of_unmerged = File(conf.get_outfile_unmerged()) hists = dict() # ROOT.gROOT.cd() for k in of.GetListOfKeys(): hists[k.GetName()] = of.Get(k.GetName()) h = hists[k.GetName()] #hists[k.GetName()].Rebin(2) logger.info("Loaded %d histograms from file %s" % (len(hists), of.GetPath())) #of_unmerged.Close() ######################## ### Merge ### ######################## from plots.common.utils import merge_hists, PhysicsProcess merge_cmds = PhysicsProcess.get_merge_dict( PhysicsProcess.get_proc_dict(conf.channel) ) hsysts = NestedDict() for k, v in hists.items(): spl = split_name(k) hsysts[spl["type"]][spl["dir"]][spl["sample"]] = v hsysts = hsysts.as_dict() p = os.path.dirname(conf.get_outfile_merged()) if not os.path.exists(p): os.makedirs(p) of = ROOT.TFile(conf.get_outfile_merged(), "RECREATE") of.cd() for syst, h1 in hsysts.items(): if syst in skipped_systs: continue for sdir, h2 in h1.items(): hmc = merge_hists(h2, merge_cmds) for hn, h in hmc.items(): if syst=="nominal" or syst=="unweighted": h.SetName("__".join([spl["var"], hn])) else: h.SetName("__".join([spl["var"], hn, syst, sdir])) h.SetDirectory(of) h.Write() nkeys = len(of.GetListOfKeys()) logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath())) of.Close() hists = load_theta_format(conf.get_outfile_merged()) processes = [] systs = [] for (variable, sample, syst, systdir), v in hists.items_flat(): processes.append(sample) systs.append(syst) processes = set(processes) systs = set(systs) logger.info("Processes: %s" % processes) if not processes == set(['diboson', 'schan', 'tWchan', 'TTJets', 'tchan', 'WJets', 'qcd', 'DYJets', 'data']): raise Exception("Combined file did not contain the necessary processes: %s" % str(processes)) logger.info("Systematic scenarios: %s" % systs)
def data_mc_plot(samples, plot_def, name, lepton_channel, lumi, weight, physics_processes, use_antiiso=False): logger.info('Plot in progress %s' % name) merge_cmds = PhysicsProcess.get_merge_dict(physics_processes) #The actual merge dictionary var = plot_def['var'] #Id var is a list/tuple, assume if not isinstance(var, basestring): try: if lepton_channel == 'ele': var = var[0] elif lepton_channel == 'mu': var = var[1] except Exception as e: logger.error("Plot variable 'var' specification incorrect for multi-variable plot: %s" % str(var)) raise e cut = None if lepton_channel == 'ele': cut = plot_def['elecut'] elif lepton_channel == 'mu': cut = plot_def['mucut'] cut_str = str(cut) plot_range = plot_def['range'] do_norm = False if 'normalize' in plot_def.keys() and plot_def['normalize']: do_norm = True hists_mc = dict() hists_data = dict() for name, sample in samples.items(): logger.debug("Starting to plot %s" % name) if sample.isMC: hist = sample.drawHistogram(var, cut_str, weight=str(weight), plot_range=plot_range) hist.Scale(sample.lumiScaleFactor(lumi)) hists_mc[sample.name] = hist if do_norm: Styling.mc_style_nostack(hists_mc[sample.name], sample.name) else: Styling.mc_style(hists_mc[sample.name], sample.name) if "fitpars" in plot_def.keys(): rescale_to_fit(sample.name, hist, plot_def["fitpars"][lepton_channel]) elif "antiiso" in name and plot_def['estQcd'] and not use_antiiso: # Make loose template #Y U NO LOOP :) -JP region = '2j1t' if '2j0t' in plot_def['estQcd']: region='2j0t' if '3j0t' in plot_def['estQcd']: region='3j0t' if '3j1t' in plot_def['estQcd']: region='3j1t' if '3j2t' in plot_def['estQcd']: region='3j2t' qcd_extra_cut = Cuts.deltaR(0.3)*Cuts.antiiso(lepton_channel) #Take the loose template with a good shape from the N-jet, M-tag, post lepton selection region with high statistics qcd_loose_cut = cutlist[region]*cutlist['presel_'+lepton_channel]*qcd_extra_cut #Take the template which can be correctly normalized from the actual region with inverted isolation cuts qcd_cut = cut*qcd_extra_cut hist_qcd_loose = sample.drawHistogram(var, str(qcd_loose_cut), weight="1.0", plot_range=plot_range) hist_qcd = sample.drawHistogram(var, str(qcd_cut), weight="1.0", plot_range=plot_range) logger.debug("Using the QCD scale factor %s: %.2f" % (plot_def['estQcd'], qcdScale[lepton_channel][plot_def['estQcd']])) hist_qcd.Scale(qcdScale[lepton_channel][plot_def['estQcd']]) hist_qcd_loose.Scale(hist_qcd.Integral()/hist_qcd_loose.Integral()) if var=='cos_theta': hist_qcd=hist_qcd_loose sampn = "QCD"+sample.name #Rescale the QCD histogram to the eta_lj fit if "fitpars" in plot_def.keys(): rescale_to_fit(sampn, hist_qcd, plot_def["fitpars"][lepton_channel]) hists_mc[sampn] = hist_qcd hists_mc[sampn].SetTitle('QCD') if do_norm: Styling.mc_style_nostack(hists_mc[sampn], 'QCD') else: Styling.mc_style(hists_mc[sampn], 'QCD') #Real ordinary data in the isolated region elif not "antiiso" in name or use_antiiso: hist_data = sample.drawHistogram(var, cut_str, weight="1.0", plot_range=plot_range) hist_data.SetTitle('Data') Styling.data_style(hist_data) hists_data[name] = hist_data if len(hists_data.values())==0: raise Exception("Couldn't draw the data histogram") #Combine the subsamples to physical processes hist_data = sum(hists_data.values()) merge_cmds['QCD']=["QCD"+merge_cmds['data'][0]] order=['QCD']+PhysicsProcess.desired_plot_order if plot_def['log']: order = PhysicsProcess.desired_plot_order_log+['QCD'] merged_hists = merge_hists(hists_mc, merge_cmds, order=order) if hist_data.Integral()<=0: logger.error(hists_data) logger.error("hist_data.entries = %d" % hist_data.GetEntries()) logger.error("hist_data.integral = %d" % hist_data.Integral()) raise Exception("Histogram for data was empty. Something went wrong, please check.") if do_norm: for k,v in merged_hists.items(): v.Scale(1./v.Integral()) hist_data.Scale(1./hist_data.Integral()) htot = sum(merged_hists.values()) chi2 = hist_data.Chi2Test(htot, "UW CHI2/NDF") if chi2>20:#FIXME: uglyness logger.error("The chi2 between data and MC is large (%s, chi2=%.2f). You may have errors with your samples!" % (name, chi2) ) logger.info("MC : %s" % " ".join(map(lambda x: "%.1f" % x, list(htot.y())))) logger.info("DATA: %s" % " ".join(map(lambda x: "%.1f" % x, list(hist_data.y())))) logger.info("diff: %s" % str( " ".join(map(lambda x: "%.1f" % x, numpy.abs(numpy.array(list(htot.y())) - numpy.array(list(hist_data.y()))))) )) merged_hists_l = merged_hists.values() PhysicsProcess.name_histograms(physics_processes, merged_hists) leg_style = ['p','f'] if do_norm: leg_style=['p','l'] leg = legend([hist_data] + list(reversed(merged_hists_l)), legend_pos=plot_def['labloc'], styles=leg_style) canv = ROOT.TCanvas() #Make the stacks stacks_d = OrderedDict() stacks_d["mc"] = merged_hists_l stacks_d["data"] = [hist_data] #label xlab = plot_def['xlab'] if not isinstance(xlab, basestring): if lepton_channel == 'ele': xlab = xlab[0] else: xlab = xlab[1] ylab = 'N / '+str((1.*(plot_range[2]-plot_range[1])/plot_range[0])) if plot_def['gev']: ylab+=' GeV' fact = 1.5 if plot_def['log']: fact = 10 plow=0.3 if do_norm: plow=0 #Make a separate pad for the stack plot p1 = ROOT.TPad("p1", "p1", 0, plow, 1, 1) p1.Draw() p1.SetTicks(1, 1); p1.SetGrid(); p1.SetFillStyle(0); p1.cd() stacks = plot_hists_stacked(p1, stacks_d, x_label=xlab, y_label=ylab, max_bin_mult = fact, do_log_y = plot_def['log'], stack = (not do_norm)) #Put the the lumi box where the legend is not boxloc = 'top-right' if plot_def['labloc'] == 'top-right': boxloc = 'top-left' chan = 'Electron' if lepton_channel == "mu": chan = 'Muon' additional_comments = "" if 'cutname' in plot_def.keys(): additional_comments += ", " + plot_def['cutname'][lepton_channel] lbox = lumi_textbox(lumi, boxloc, 'preliminary', chan + ' channel' + additional_comments ) #Draw everything lbox.Draw() leg.Draw() canv.Draw() #Keep the handles just in case canv.PAD1 = p1 canv.STACKS = stacks canv.LEGEND = legend canv.LUMIBOX = lbox return canv, merged_hists, htot, hist_data