def get_yield_histogram(list_of_file_names, regions, labels=[], hsuffix="_cutflow", sfs={}): final_h = r.TH1F("yields", "", len(regions), 0, len(regions)) final_h.Sumw2() yields = [] for i in xrange(len(regions)): yields.append(E(0, 0)) for file_name in list_of_file_names: f = r.TFile(file_name) for index, region in enumerate(regions): try: prefix = region.split("(")[0] h = f.Get(prefix + hsuffix) apply_sf(h, sfs, file_name, prefix) binoffset = int(region.split("(")[1].split(")")[0]) if len(region.split("(")) > 1 else h.GetNbinsX() if binoffset < 0: binoffset = h.GetNbinsX() + 1 + binoffset bc = h.GetBinContent(binoffset) be = h.GetBinError(binoffset) yields[index] += E(bc, be) except: #print "Could not find", region+hsuffix, "in", file_name pass #print file_name f.Close() for i in xrange(len(regions)): final_h.SetBinContent(i+1, yields[i].val) final_h.SetBinError(i+1, yields[i].err) if len(labels): final_h.GetXaxis().SetBinLabel(i+1, labels[i]) final_h.SetCanExtend(False) return final_h
def print_table(d_yields): nbins = len(d_yields["ttz"]["central"]) # colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","$\\ttVV$","X+$\\gamma$","Rares","Flips","Fakes","Total","Data","$\\tttt$"] # procs = ["ttw","ttz","tth","ttvv","xg","rares","flips","fakes","total_background","data","tttt"] colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","$\\ttVV$","X+$\\gamma$","Rares","Flips","Fakes","Total","Data","$\\tttt$", "tot s+b"] allprocs = [["ttw"],["ttz"],["tth"],["ttvv"],["xg"],["rares"],["flips"],["fakes"],["total_background"],["data"],["tttt"],["total"]] # colnames = ["","$\\ttW$","$\\ttZ$","$\\ttH$","Others","Total","Data","$\\tttt$"] # allprocs = [["ttw"],["ttz"],["tth"],["ttvv","xg","rares","flips","fakes"],["total_background"],["data"],["tttt"]] srnames = ["CRZ","CRW","SR1","SR2","SR3","SR4","SR5","SR6","SR7","SR8"] for ibin in range(nbins): # print ibin if ibin == 0: print "&".join(map(lambda x: "{0:12s}".format(x),colnames)), print r"\\" print r"\hline\hline" tojoin = [srnames[ibin]] for procs in allprocs: tot_ve = E(0.,0.) for subproc in procs: ve = E(max(d_yields[subproc]["central"][ibin],0.), d_yields[subproc]["error"][ibin]) tot_ve += ve cent,err = tot_ve if "data" in procs: tojoin.append("{0:.0f}".format(cent)) else: tojoin.append("{0:5.2f}$\\pm${1:5.2f}".format(cent,err)) print " & ".join(tojoin), print r"\\"
def apply_sf(h, sfs, file_name, hist_name): labels = h.GetXaxis().GetLabels() if labels: h.GetXaxis().SetRange(1, h.GetXaxis().GetNbins()) h.GetXaxis().SetCanExtend(False) for key in sfs: if key in file_name: for rptn in sfs[key]: if rptn in hist_name: for i in xrange(0, h.GetNbinsX()+2): bc, be = h.GetBinContent(i), h.GetBinError(i) nb = E(bc, be) * E(sfs[key][rptn][0], sfs[key][rptn][1]) h.SetBinContent(i, nb.val)
def rebin36(h): # To rebin emu channels 180 bins to 5 bin h.Rebin(36) bin5 = E(h.GetBinContent(5), h.GetBinError(5)) bin6 = E(h.GetBinContent(6), h.GetBinError(6)) bin5plus6 = bin5 + bin6 h.SetBinContent(6, 0) h.SetBinError(6, 0) h.SetBinContent(5, bin5plus6.val) h.SetBinError(5, bin5plus6.err) return h
def print_yield_table(procs, rates, output_name): hists = [] bkgh = r.TH1F("Total", "Total", 1, 0, 1) total_rate = E(0, 0) for proc, rate in zip(procs, rates): procname = proc.split("_")[1] h = r.TH1F(procname, procname, 1, 0, 1) if procname != "sig" and procname != "wzz" and procname != "wzz" and procname != "zzz": total_rate += rate h.SetBinContent(1, rate.val) h.SetBinError(1, rate.err) hists.append(h) bkgh.SetBinContent(1, total_rate.val) bkgh.SetBinError(1, total_rate.err) hists.insert(0, bkgh) obsh = bkgh.Clone("obs") obsh.Reset() hists.insert(0, obsh) p.print_yield_table_from_list(hists, output_name + ".txt", prec=2, binrange=[1]) p.print_yield_tex_table_from_list(hists, output_name + ".tex", prec=2)
def get_alpha(process, numerator_region, denominator_region, valopt="eff"): systs = syst_list_all[1:] nominal = run_alpha(process, numerator_region, denominator_region, "", valopt) rtn_val = {} rtn_val["Nominal"] = nominal for syst in systs: var = E(nominal.val, 0) varup = run_alpha(process, numerator_region, denominator_region, syst + "Up", valopt) vardn = run_alpha(process, numerator_region, denominator_region, syst + "Down", valopt) err = math.sqrt(abs(((varup - var) * (vardn - var)).val)) var.err = err rtn_val[syst] = var # print syst, varup, vardn, nominal # Not entirely a correct treatment... but a work around pufracerr = rtn_val["Pileup"].err / rtn_val["Pileup"].val metpufracerr = rtn_val["METPileup"].err / rtn_val["METPileup"].val rtn_val["Pileup"] = E( rtn_val["Pileup"].val, rtn_val["Pileup"].val * math.sqrt(pufracerr**2 + metpufracerr**2)) del rtn_val["METPileup"] # for key in syst_list_all: # if key == "METPileup": continue # print "{:<10s} {:.3f} {:.3f} {:.3f}".format(key, rtn_val[key].val, rtn_val[key].err, rtn_val[key].err / rtn_val[key].val) hists = [] for index, key in enumerate(syst_list_all): if key == "METPileup": continue h = r.TH1F("{}".format(key), "", 1, 0, 1) h.SetBinContent(1, rtn_val[key].val) h.SetBinError(1, rtn_val[key].err) hists.append(h) return hists
def read_table(fname): f = open(fname) lines = [l.strip() for l in f.readlines()] categories = [] yields = {} for line in lines: if "Bin#" in line: line = "".join([" "] + line.split()[3:]) line = line.replace("|", " ") categories = line.split() for category in categories: yields[category] = [] if "Bin" in line: line = "".join([" "] + line.split()[3:]) line = line.replace("|", " ") line = line.replace(u"\u00B1".encode("utf-8"), ",") for category, item in zip(categories, line.split()): val, err = item.split(",") yields[category].append(E(float(val), float(err))) return yields
def print_yields(self, detail=False): systs_lines = [] for syst in self.systs: systname = "" systvals = [] if self.check_gmN(syst): systs_lines.append(self.get_syst_str(syst)) systs_lines += self.get_stats_str().split('\n')[:-1] systs_data = {} for syst_line in systs_lines: systname = "" systvals = [] syst_data = syst_line.split() systname = syst_data[0] if syst_data[1] == 'gmN': dataN = int(syst_data[2]) systvals = syst_data[3:] systvals_new = [] for systval in systvals: if systval != "-": systvals_new.append( "{:.4f}".format(1. + 1. / math.sqrt(float(dataN)))) else: systvals_new.append(systval) systvals = systvals_new else: systvals = syst_data[2:] systvals_in_float = [] for systval in systvals: systval_in_float = 0 if '/' in systval: up = abs(float(systval.split('/')[0]) - 1) down = abs(float(systval.split('/')[1]) - 1) systval_in_float = math.sqrt(up * down) elif '-' in systval: systval_in_float = 0 else: systval_in_float = abs(float(systval) - 1) systvals_in_float.append(systval_in_float) systs_data[systname] = systvals_in_float if detail: rates_errs = {} print_str = "{:<40s}".format("systematics") for procname in self.proc_names: print_str += "& " + "{:<20s}".format(procname) print print_str print_str = "" for systname in sorted(systs_data.keys()): print_str = "{:<40s}".format(systname) rates_errs[systname] = {} for index, (rate, procname) in enumerate( zip(self.rates, self.proc_names)): # rates_errs[systname][procname] = E(rate, 0) rates_errs[systname][ procname] = systs_data[systname][index] * 100. if rates_errs[systname][procname] == 0: print_str += "& " + "{:<20s}".format("-") else: print_str += "& " + "{:<20.1f}".format( rates_errs[systname][procname]) print print_str print_str = "" # rates_errs = [] # for index, _ in enumerate(self.rates): # rate_err = E(self.rates[index], 0) # for systname in systs_data: # rate_err *= E(1, systs_data[systname][index]) # rates_errs.append(rate_err) # for proc, rate_err in zip(self.proc_names, rates_errs): # if rate_err.val != 0: # print proc, rate_err, rate_err.err / rate_err.val # else: # print proc, rate_err, 0 else: rates_errs = [] for index, _ in enumerate(self.rates): rate_err = E(self.rates[index], 0) for systname in systs_data: rate_err *= E(1, systs_data[systname][index]) rates_errs.append(rate_err) for proc, rate_err in zip(self.proc_names, rates_errs): if rate_err.val != 0: print proc, rate_err, rate_err.err / rate_err.val else: print proc, rate_err, 0 return self.proc_names, rates_errs
print cmd output = commands.getoutput(cmd) print output d_sfs = ast.literal_eval(output.replace("GREP", "").strip()) sf_wz, sf_err_wz = d_sfs["totals"]["wz"] sf_ttz, sf_err_ttz = d_sfs["totals"]["ttz"] sf_fakes, sf_err_fakes = d_sfs["totals"]["fakes"] sf_rares, sf_err_rares = d_sfs["totals"]["rares"] # calculate chi2 counts_wz, errs_wz = d_sfs["postfit_totals"]["wz"] counts_ttz, errs_ttz = d_sfs["postfit_totals"]["ttz"] counts_fakes, errs_fakes = d_sfs["postfit_totals"]["fakes"] counts_rares, errs_rares = d_sfs["postfit_totals"]["rares"] bins_wz = map(lambda x: E(*x), zip(counts_wz, errs_wz)) bins_ttz = map(lambda x: E(*x), zip(counts_ttz, errs_ttz)) bins_fakes = map(lambda x: E(*x), zip(counts_fakes, errs_fakes)) bins_rares = map(lambda x: E(*x), zip(counts_rares, errs_rares)) # if we are missing a wz bin at the end if len(bins_ttz) == 4 and len(bins_wz) < 4: while len(bins_wz) < 4: bins_wz.append(E(0.0, 0.0)) bins_pred = map(sum, zip(*[bins_wz, bins_ttz, bins_fakes, bins_rares])) bins_data = list(h_data)[1:-1] # drop underflow and overflow bins chi2sum = 0. for data, pred_ce in zip(bins_data, bins_pred): chi2sum += ((data - pred_ce[0]) / pred_ce[1])**2.0 print data, pred_ce[0], pred_ce[1]
def main_fake_rate_measurement(prefix, output_name, etaregion="", procname="ttbar6"): # Parse the input arguments try: ntuple_version = sys.argv[1] tag = sys.argv[2] except: usage() if "2016" in ntuple_version: lumi = 35.9 if "2017" in ntuple_version: lumi = 41.3 if "2018" in ntuple_version: lumi = 59.74 basedir = "plots/{}/{}/lin/".format(ntuple_version, tag) # Denominator : fake from data (i.e. data - prompt) yields_ddfake = rt.read_table(basedir + prefix + "Prompt__lepFakeCand2PtFineVarBin"+etaregion+".txt") yields_ddfake["ddfake"] = [] for datacount, bkgcount in zip(yields_ddfake["data"], yields_ddfake["Total"]): yields_ddfake["ddfake"].append(datacount - bkgcount) # print yields_ddfake["ddfake"] # Numerator : fake from data (i.e. data - prompt) yields_ddfake_tight = rt.read_table(basedir + prefix + "TightPrompt__lepFakeCand2PtFineVarBin"+etaregion+".txt") yields_ddfake_tight["ddfake"] = [] for datacount, bkgcount in zip(yields_ddfake_tight["data"], yields_ddfake_tight["Total"]): yields_ddfake_tight["ddfake"].append(datacount - bkgcount) # print yields_ddfake_tight["ddfake"] fr_data = [] for den, num in zip(yields_ddfake["ddfake"], yields_ddfake_tight["ddfake"]): if den.val != 0: fr = num / den fr_data.append(fr) else: fr_data.append(E(0, 0)) fr_data.pop(0) # first one is underflow bin fr_data.pop(0) # second one is underflow bin fr_data.pop(-1) # last one is overflow bin print(fr_data) # Denominator: Fake directly from ttbar MC yields_ttbar = rt.read_table(basedir + prefix + "Fake__lepFakeCand2PtFineVarBin"+etaregion+".txt") # print yields_ttbar[procname] # Numerator: fake from data (i.e. data - prompt) yields_ttbar_tight = rt.read_table(basedir + prefix + "TightFake__lepFakeCand2PtFineVarBin"+etaregion+".txt") # print yields_ttbar_tight[procname] fr_mc = [] for den, num in zip(yields_ttbar[procname], yields_ttbar_tight[procname]): if den.val != 0: fr = num / den fr_mc.append(fr) else: fr_mc.append(E(0, 0)) print(fr_mc) fr_mc.pop(0) # first one is underflow bin fr_mc.pop(0) # second one is underflow bin fr_mc.pop(-1) # last one is overflow bin # bin boundaries # bounds = [0., 10., 15., 20., 30., 150.] # bounds = [0., 10., 20., 70.] bounds = [0., 10., 20., 30., 50., 70.] h_fr_data = r.TH1F("FR","",len(bounds)-1,array('d',bounds)) h_fr_mc = r.TH1F("FR","",len(bounds)-1,array('d',bounds)) for idx, fr in enumerate(fr_data): h_fr_data.SetBinContent(idx+2, fr.val) h_fr_data.SetBinError(idx+2, fr.err) for idx, fr in enumerate(fr_mc): h_fr_mc.SetBinContent(idx+2, fr.val) h_fr_mc.SetBinError(idx+2, fr.err) # Options alloptions= { "ratio_range":[0.0,2.0], "nbins": 180, "autobin": False, "legend_scalex": 0.8, "legend_scaley": 0.8, "output_name": basedir + "/"+output_name+".pdf", "bkg_sort_method": "unsorted", "no_ratio": False, "print_yield": True, "yield_prec": 3, "draw_points": True, "hist_line_none": True, "show_bkg_errors": True, "lumi_value" : lumi, # "yaxis_range": [0., 1], } p.plot_hist( sigs = [], bgs = [h_fr_mc.Clone()], data = h_fr_data.Clone(), syst = None, colors=[2001], legend_labels=["MC t#bar{t}"], options=alloptions) return h_fr_mc.Clone(), h_fr_data.Clone()
def write_table(data, bgs, outname=None, signal=None, extra_hists=[],precision=2,sep = u"\u00B1".encode("utf-8"), binedge_fmt="{}-{}", fix_negative=True, binlabels=[], show_errors=True, cell_callback=None): tab = Table() sumbgs = sum(bgs) databg = data/sumbgs if signal is not None: procs = bgs+[sumbgs,data,databg,signal] cnames = [bg.get_attr("label") for bg in bgs] + ["Total bkg","Data", "Data/bkg","tttt"] else: procs = bgs+[sumbgs,data,databg] cnames = [bg.get_attr("label") for bg in bgs] + ["Total bkg","Data", "Data/bkg"] for eh in extra_hists: procs.append(eh) cnames.append(eh.get_attr("label")) tab.set_column_names(["bin"]+cnames) if outname: sep = "+-" binpairs = zip(data.edges[:-1],data.edges[1:]) tab.set_theme_basic() for ibin,binrow in enumerate(binpairs): row = [("[%s]"%binedge_fmt).format(binrow[0],binrow[1])] if ibin < len(binlabels): row = [binlabels[ibin]] for iproc,proc in enumerate(procs): if fix_negative: cent = max(proc.counts[ibin],0.) else: cent = proc.counts[ibin] err = proc.errors[ibin] if show_errors: tmp = ("{0:5.%if} {1}{2:%i.%if}" % (precision,precision+3,precision)).format(cent,sep,err) else: tmp = ("{0:5.%if}" % (precision)).format(cent) if cell_callback: tmp = cell_callback(tmp) row.append(tmp) tab.add_row(row) tab.add_line() row = ["total"] for iproc,proc in enumerate(procs): if iproc == len(procs)-(1+(signal is not None)+len(extra_hists)): totbg = E(sum(sumbgs.counts), np.sum(sumbgs.errors**2.)**0.5) totdata = E(sum(data.counts)) ratio = totdata/totbg cent, err = ratio[0], ratio[1] precision = max(precision, 2) if precision != 0 else 0 else: cent = sum(proc.counts) err = np.sum(proc.errors**2.)**0.5 if show_errors: tmp = ("{0:5.%if} {1}{2:%i.%if}" % (precision,precision+3,precision)).format(cent,sep,err) else: tmp = ("{0:5.%if}" % (precision)).format(cent) if cell_callback: tmp = cell_callback(tmp) row.append(tmp) tab.add_row(row) if outname: with open(outname,"w") as fhout: # towrite = "".join(tab.get_table_string(show_row_separators=False,show_alternating=False)) towrite = "".join(tab.get_table_strings(show_row_separators=False,show_alternating=False)) fhout.write(towrite) parts = towrite.split("\n") header = parts[:3] binparts = parts[3:-4] total = parts[-4:-1] table_info = { "header":"<br>".join(header), "bins":binparts, "total":"<br>".join(total) } return table_info return tab
def plot(histnames, ps=0, sf=None, sfqcd=None, output_suffix="", dd_qcd=None): # Glob the file lists bkg_list_wjets = [output_dirpath + "/wj_incl.root"] bkg_list_dy = [output_dirpath + "/dy.root"] bkg_list_ttbar = [output_dirpath + "/tt_incl.root"] bkg_list_vv = [output_dirpath + "/ww.root", output_dirpath + "/wz.root"] bkg_list_qcd_mu = [output_dirpath + "/qcd_mu.root"] bkg_list_qcd_el = [output_dirpath + "/qcd_em.root"] bkg_list_qcd_bc = [output_dirpath + "/qcd_bc.root"] bkg_list_all = bkg_list_wjets + bkg_list_dy + bkg_list_ttbar + bkg_list_vv # Glob the data file list depending on the region if "Mu" in histnames: data_list = [output_dirpath + "/data_mu.root"] elif "El" in histnames: data_list = [output_dirpath + "/data_el.root"] else: data_list = [ output_dirpath + "/data_mu.root", output_dirpath + "/data_el.root" ] # Get all the histogram objects h_wjets = ru.get_summed_histogram(bkg_list_wjets, histnames) h_dy = ru.get_summed_histogram(bkg_list_dy, histnames) h_ttbar = ru.get_summed_histogram(bkg_list_ttbar, histnames) h_vv = ru.get_summed_histogram(bkg_list_vv, histnames) h_qcd_mu = ru.get_summed_histogram(bkg_list_qcd_mu, histnames) h_qcd_el = ru.get_summed_histogram(bkg_list_qcd_el, histnames) h_qcd_bc = ru.get_summed_histogram(bkg_list_qcd_bc, histnames) h_data = ru.get_summed_histogram(data_list, histnames) # Set the names of the histograms h_wjets.SetName("W") h_dy.SetName("Z") h_ttbar.SetName("Top") h_vv.SetName("VV") h_qcd_mu.SetName("QCD(#mu)") h_qcd_el.SetName("QCD(e)") h_qcd_bc.SetName("QCD(bc)") h_data.SetName("Data") # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral() # print h_qcd_el.Integral() + h_qcd_bc.Integral() # Scale the histograms appropriately from SF from the EWKCR if sf: if isinstance(sf, list): hists = [h_wjets, h_dy, h_ttbar, h_vv] for h in hists: for ii, s in enumerate(sf): bc = h.GetBinContent(ii + 1) be = h.GetBinError(ii + 1) h.SetBinContent(ii + 1, bc * s) h.SetBinError(ii + 1, be * s) else: if sf > 0: h_wjets.Scale(sf) h_dy.Scale(sf) h_ttbar.Scale(sf) h_vv.Scale(sf) if sfqcd: if isinstance(sfqcd, list): hists = [h_qcd_mu, h_qcd_el, h_qcd_bc] for h in hists: for ii, s in enumerate(sfqcd): bc = h.GetBinContent(ii + 1) be = h.GetBinError(ii + 1) h.SetBinContent(ii + 1, bc * s) h.SetBinError(ii + 1, be * s) else: if sfqcd > 0: h_qcd_mu.Scale(sfqcd) h_qcd_el.Scale(sfqcd) h_qcd_bc.Scale(sfqcd) # If the data needs some additional correction for the prescale if ps > 0: h_data.Scale(ps) # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral() # print h_qcd_el.Integral() + h_qcd_bc.Integral() # print h_data.Integral() # Color settings colors = [2007, 2005, 2003, 2001, 920, 921] # Options alloptions = { "ratio_range": [0.0, 2.0], "nbins": 30, "autobin": False, "legend_scalex": 1.8, "legend_scaley": 1.1, "output_name": "plots/{}/{}/{}/plot/{}{}.pdf".format(input_ntup_tag, analysis_tag, "ss" if isSS else "3l", histnames, output_suffix), "bkg_sort_method": "unsorted", "no_ratio": False, "print_yield": True, "yaxis_log": False if "ptcorr" in histnames else False, #"yaxis_log": False, #"yaxis_log": False, "divide_by_bin_width": True, "legend_smart": False if "ptcorr" in histnames else True, "lumi_value": lumi, } # The bkg histogram list h_qcd = h_qcd_mu if "Mu" in histnames else h_qcd_el if dd_qcd: h_qcd = dd_qcd bgs_list = [h_vv, h_ttbar, h_dy, h_wjets, h_qcd] legend_labels = ["VV", "t#bar{t}", "DY", "W", "QCD(#mu)" ] if "Mu" in histnames else [ "VV", "t#bar{t}", "DY", "W", "QCD(e)", "QCD(HF)" ] if "Mu" not in histnames: bgs_list.append(h_qcd_bc) # # For 2018 merge the last two bins in the central # if "ptcorretarolledcoarse" in histnames: # def merge_4_5(h): # bc4 = h.GetBinContent(4) # bc5 = h.GetBinContent(5) # be4 = h.GetBinError(4) # be5 = h.GetBinError(5) # nb = E(bc4, be4) + E(bc5, be5) # nbc = nb.val # nbe = nb.err # h.SetBinContent(4, nbc) # h.SetBinError(4, nbe) # h.SetBinContent(5, nbc) # h.SetBinError(5, nbe) # merge_4_5(h_vv) # merge_4_5(h_ttbar) # merge_4_5(h_dy) # merge_4_5(h_wjets) # merge_4_5(h_qcd_mu) # merge_4_5(h_qcd_el) # merge_4_5(h_qcd_bc) # merge_4_5(h_data) # Plot them p.plot_hist(bgs=bgs_list, data=h_data.Clone("Data"), colors=colors, syst=None, legend_labels=legend_labels, options=alloptions) # print h_wjets.Integral() + h_dy.Integral() + h_ttbar.Integral() + h_vv.Integral() # print h_qcd_el.Integral() + h_qcd_bc.Integral() # print h_data.Integral() # Obtain the histogram again to return the object for further calculations # Data-driven QCD = data - bkg h_ddqcd = ru.get_summed_histogram(data_list, histnames) h_bkg = ru.get_summed_histogram(bkg_list_all, histnames) h_wjets = ru.get_summed_histogram(bkg_list_wjets, histnames) h_dy = ru.get_summed_histogram(bkg_list_dy, histnames) h_ttbar = ru.get_summed_histogram(bkg_list_ttbar, histnames) h_vv = ru.get_summed_histogram(bkg_list_vv, histnames) if ps > 0: h_ddqcd.Scale(ps) # Scale the histograms appropriately from SF from the EWKCR if sf: if isinstance(sf, list): hists = [h_bkg, h_wjets, h_dy, h_ttbar, h_vv] for h in hists: for ii, s in enumerate(sf): bc = h.GetBinContent(ii + 1) be = h.GetBinError(ii + 1) h.SetBinContent(ii + 1, bc * s) h.SetBinError(ii + 1, be * s) else: if sf > 0: h_bkg.Scale(sf) h_wjets.Scale(sf) h_dy.Scale(sf) h_ttbar.Scale(sf) h_vv.Scale(sf) if "ptcorretarolled" in histnames: # print h_ddqcd.GetBinContent(6), h_ddqcd.GetBinContent(7) # d6 = E(h_ddqcd.GetBinContent(6), h_ddqcd.GetBinError(6)) + E(h_ddqcd.GetBinContent(7), h_ddqcd.GetBinError(7)) # d13 = E(h_ddqcd.GetBinContent(13), h_ddqcd.GetBinError(13)) + E(h_ddqcd.GetBinContent(14), h_ddqcd.GetBinError(14)) # b6 = E(h_bkg.GetBinContent(6), h_bkg.GetBinError(6)) + E(h_bkg.GetBinContent(7), h_bkg.GetBinError(7)) # b13 = E(h_bkg.GetBinContent(13), h_bkg.GetBinError(13)) + E(h_bkg.GetBinContent(14), h_bkg.GetBinError(14)) # h_ddqcd.SetBinContent(6, d6.val) # h_ddqcd.SetBinContent(7, d6.val) # h_ddqcd.SetBinError(6, d6.err) # h_ddqcd.SetBinError(7, d6.err) # h_ddqcd.SetBinContent(13, d13.val) # h_ddqcd.SetBinContent(14, d13.val) # h_ddqcd.SetBinError(13, d13.err) # h_ddqcd.SetBinError(14, d13.err) # h_bkg.SetBinContent(6, b6.val) # h_bkg.SetBinContent(7, b6.val) # h_bkg.SetBinError(6, b6.err) # h_bkg.SetBinError(7, b6.err) # h_bkg.SetBinContent(13, b13.val) # h_bkg.SetBinContent(14, b13.val) # h_bkg.SetBinError(13, b13.err) # h_bkg.SetBinError(14, b13.err) for ii in xrange(1, h_ddqcd.GetNbinsX() + 1): data_bc = h_ddqcd.GetBinContent(ii) data_be = h_ddqcd.GetBinError(ii) bkg_bc = h_bkg.GetBinContent(ii) bkg_be = h_bkg.GetBinError(ii) d = E(data_bc, data_be) b = E(bkg_bc, bkg_be) n = d - b if isSS: if d.err > n.val: n.val = d.err h_ddqcd.SetBinContent(ii, n.val) h_ddqcd.SetBinError(ii, n.err) else: h_ddqcd.Add(h_bkg, -1) # MC QCD h_qcd_mu = ru.get_summed_histogram(bkg_list_qcd_mu, histnames).Clone("QCD(#mu)") h_qcd_el = ru.get_summed_histogram(bkg_list_qcd_el, histnames).Clone("QCD(EM)") h_qcd_bc = ru.get_summed_histogram(bkg_list_qcd_bc, histnames).Clone("QCD(HF)") return h_ddqcd, h_data, h_bkg, h_qcd_mu, h_qcd_el, h_qcd_bc, h_wjets, h_dy, h_ttbar, h_vv
def yield_str(hist, i, prec=3): e = E(hist.GetBinContent(i), hist.GetBinError(i)) return e.round(prec)
def get_sfs(infile, lnNsig=2.0, lnNbg=1.5, shapeUnc=0.1): outfile = "forCard.root" card_filename = "card.txt" variations = ["btag","jes"] # variations = ["jes"] # FIXME # variations = [] procs = ["data", "wz", "ttz", "fakes", "rares"] # procs = ["data", "wz", "ttz", "fakes"] variationsud = sum([[v+"_up",v+"_dn"] for v in variations],[]) files = {} files["central"] = r.TFile(infile) for var in variationsud: # print "%s_%s_%s.root" % (infile.split(".root")[0], var, ud) files[var] = r.TFile("%s_%s.root" % (infile.split(".root")[0], var)) # return print ">>> Reading input histograms from %s" % infile procs_nodata = procs[1:] procs_signodata = ["sig"] + procs_nodata hnames = {} for var in ["central"] + variationsud: keys = files[var].GetListOfKeys() histnames = [key.GetName() for key in keys if key.ReadObj().InheritsFrom(r.TH1F.Class())] hnames[var] = {} hnames[var]["data"] = [hn for hn in histnames if "Data" in hn] hnames[var]["wz"] = [hn for hn in histnames if "WZ" in hn] hnames[var]["ttz"] = [hn for hn in histnames if "ttZ" in hn] hnames[var]["fakes"] = [hn for hn in histnames if "Fakes" in hn] hnames[var]["rares"] = [hn for hn in histnames if ("Fakes" not in hn) and ("WZ" not in hn) and ("Data" not in hn) and ("ttZ" not in hn)] hists = {} # for k,v in hnames.items(): for proc in procs: # tmp = [files["central"].Get(hn) for hn in hnames["central"][proc]] # hists["central"][proc] = tmp[0].Clone("h_"+proc) # for h in tmp[1:]: hists["central"][proc].Add(h) for var in ["central"] + variationsud: if var not in hists: hists[var] = {} tmp = [files[var].Get(hn) for hn in hnames[var][proc]] # name = "h_"+proc name = "" if "cent" not in var: name += var.replace("_up","Up").replace("_dn","Down") if name == "": name = proc+"shape" # print name hists[var][proc] = tmp[0].Clone(name) for h in tmp[1:]: hists[var][proc].Add(h) d_prefit = {} for proc in procs: d_prefit[proc] = get_bin_yields_and_errors(hists["central"][proc]) print ">>> Writing output histograms for combine into %s" % outfile d_fouts = {} hists["central"]["sig"] = hists["central"]["data"].Clone("sigshape") for ix in range(1,hists["central"]["sig"].GetNbinsX()+1): hists["central"]["sig"].SetBinContent(ix,1.0) for proc in procs + ["sig"]: d_fouts[proc] = r.TFile(outfile.replace(".root","_%s.root" % proc), "RECREATE") for var in ["central"] + variationsud: if proc == "sig": if "cent" not in var: hists["central"]["sig"].SetName(var.replace("_up","Up").replace("_dn","Down")) map(lambda x: x.Write(), get_variations(hists["central"]["sig"], syst=0.0)) else: map(lambda x: x.Write(), get_variations(hists[var][proc], syst=(shapeUnc if "data" not in proc.lower() else 0.0), do_var=not("cent" in var))) # # fake signal with 1 event in each bin (arbitrary since we only care about BG only fit) # d_fouts["sig"] = r.TFile(outfile.replace(".root","_%s.root" % "sig"), "RECREATE") # for ix in range(1,hists["central"]["sig"].GetNbinsX()+1): hists["central"]["sig"].SetBinContent(ix,1.0) Nbins = hists["central"]["sig"].GetNbinsX() Nproc = len(procs) counts = {} for proc in procs_signodata + ["data"]: counts[proc] = [hists["central"][proc].GetBinContent(ix) for ix in range(1,Nbins+1)] bin_str = " ".join([("ch%i "%i) * Nproc for i in range(1,Nbins+1)]) proc1_str = (" ".join(procs_signodata) + " ") * Nbins proc2_str = " ".join([" ".join(map(str,range(0,Nproc))) for _ in range(0,Nbins)]) rate_list = sum(map(list,zip(*[counts[proc] for proc in procs_signodata])),[]) all_correlated = True buff = "" buff += "imax %i\n" % (Nbins) buff += "jmax %i\n" % (Nproc-1) if all_correlated: buff += "kmax %i\n" % ((Nbins+1+len(variations))*(Nproc-1)-((Nproc-2)*len(variations))) else: buff += "kmax %i\n" % ((Nbins+1+len(variations))*(Nproc-1)) buff += "------------\n" for i in range(1,Nbins+1): for proc in ["data_obs"] + procs_signodata: proc2 = proc.replace("_obs","") ofile = outfile.replace(".root","_%s.root" % proc2) buff += "shapes {0} ch{1} {2} bin{1}_{4}shape{1} bin{1}_{3}\n".format( proc, i, ofile, (("datashape"+str(i)) if "data" in proc else "$SYSTEMATIC"), proc2, ) buff += "------------\n" buff += "bin " + " ".join(["ch%i" % i for i in range(1,Nbins+1)]) + "\n" buff += "observation " + " ".join(["%.2f" % e for e in counts["data"]]) + "\n" buff += "------------\n" buff += "bin " + bin_str + "\n" buff += "process " + proc1_str + "\n" buff += "process " + proc2_str + "\n" buff += "rate " + " ".join(["%.2f" % e for e in rate_list]) + "\n" buff += "------------\n" # for ithing,thing in enumerate(["wz","ttz","fakes","rares"]): for ithing,thing in enumerate(procs_nodata): norm = lnNbg if thing in ["wz","ttz"]: norm = lnNsig # FIXME # if thing in ["ttz"]: norm = 1.12 # if thing in ["rares"]: norm = 1.25 for ibin in range(Nbins): buff += get_nuisance_line(thing+"shape"+str(ibin+1), "shape", len(rate_list), 1.0, [ibin*(Nproc)+ithing+1]) buff += get_nuisance_line(thing, "lnN", len(rate_list), norm, range(ithing+1,(Nbins+1)*Nproc,Nproc)) for var in variations: if all_correlated: if ithing == 0: buff += get_nuisance_line(var, "shape", len(rate_list), 1.0, range(1,(Nbins+1)*Nproc)) else: buff += get_nuisance_line(thing+var, "shape", len(rate_list), 1.0, range(ithing+1,(Nbins+1)*Nproc,Nproc)) card_txt = buff # print card_txt for fout in d_fouts.values(): fout.Close() print ">>> Writing card %s" % card_filename # write card with open(card_filename, "w") as fhout: fhout.write(card_txt) # sys.exit() # run combine print ">>> Running combine with card %s" % card_filename cmd = "combine -M MaxLikelihoodFit %s --saveNorm --saveWithUncertainties" % card_filename output = commands.getoutput(cmd) print output if "Done in" in output: print ">>> Combine finished successfully" else: print ">>> [!] ERROR with combine. Output below:" print "-"*40 print output print "-"*40 sys.exit() print ">>> Examining fit results" # examine output fin = r.TFile.Open("mlfit.root"); prefit = fin.Get("norm_prefit") fit_s = fin.Get("norm_fit_s") fit_b = fin.Get("norm_fit_b") iter = fit_b.createIterator() # print "{:>10} {:>21} {:>21} {:>21}".format("process", "prefit", "postfit", "SF") # print "-"*80 d_sfs = {} d_cerrs = {} while True: norm_s = iter.Next() if norm_s == None: break; norm_b = fit_b.find(norm_s.GetName()) norm_p = prefit.find(norm_s.GetName()) if prefit else None # title = norm_s.GetName().split("/")[-1] title = norm_s.GetName() binzi = int(title.split("/")[0].split("ch")[-1])-1 # zero indexed name = title.split("/")[-1] if "sig" in title: continue p_val, p_err = d_prefit[name][binzi][0],d_prefit[name][binzi][1] try: Epostfit = E(norm_b.getVal(), norm_b.getError()) Eprefit = E(p_val, p_err) # Eprefit = E(norm_p.getVal(),norm_p.getError()) # print "prefit,postfit",name,binzi,Eprefit,Epostfit, norm_p.getVal(), norm_p.getError() if name not in d_cerrs: d_cerrs[name] = {"prefit": [], "postfit": []} d_cerrs[name]["prefit"].append(Eprefit) d_cerrs[name]["postfit"].append(Epostfit) sf, sferr = Epostfit/Eprefit except: sf, sferr = -1.0, 0.0 # print "%s \t\t %.2f +- %.2f \t\t %.2f +- %.2f \t\t %.2f +- %.2f" % (title, d_prefit[title][0],d_prefit[title][1],norm_b.getVal(),norm_b.getError(), sf, sferr) # print "{:>10} {:>15,.2f} +-{:>6,.2f} {:>15,.2f} +-{:>6,.2f} {:>15,.2f} +-{:>6,.2f}".format( # title, # p_val, p_err, # norm_b.getVal(),norm_b.getError(), # sf, sferr # ) d_sfs[title] = (sf, sferr) print "total SFs:" d_sfs["totals"] = {} d_sfs["postfit_totals"] = {} for proc in d_cerrs: sf = sum(d_cerrs[proc]["postfit"])/sum(d_cerrs[proc]["prefit"]) d_sfs["postfit_totals"][proc.split("/")[-1]] = ( map(lambda x:x[0], d_cerrs[proc]["postfit"]), map(lambda x:x[1], d_cerrs[proc]["postfit"]) ) d_sfs["totals"][proc.split("/")[-1]] = (sf[0],sf[1]) print "{:>10} {:>15,.2f} +-{:>6,.2f}".format(proc, sf[0], sf[1]) return d_sfs
def get_alpha_hists(proc, num, den): hists_num = get_alpha(proc, num, den, "num") hists_den = get_alpha(proc, num, den, "den") hists_eff = get_alpha(proc, num, den, "eff") hists = [] totalerrors = [E(1, 0), E(1, 0), E(1, 0)] for hist_num, hist_den, hist_eff in zip(hists_num, hists_den, hists_eff): syst = hist_num.GetName() if syst == "Nominal": h = r.TH1F("{}".format(hist_num.GetName()), "", 3, 0, 3) h.SetBinContent(1, hist_eff.GetBinContent(1)) h.SetBinError(1, hist_eff.GetBinError(1)) h.SetBinContent(2, hist_num.GetBinContent(1)) h.SetBinError(2, hist_num.GetBinError(1)) h.SetBinContent(3, hist_den.GetBinContent(1)) h.SetBinError(3, hist_den.GetBinError(1)) h_ratio = h.Clone("Ratio") h_ratio.SetBinContent(2, 0) h_ratio.SetBinContent(3, 0) h_ratio.SetBinError(2, 0) h_ratio.SetBinError(3, 0) h_yield = h.Clone("Yield") h_yield.SetBinContent(1, 0) h_yield.SetBinError(1, 0) hists.append(h_ratio) hists.append(h_yield) h = r.TH1F("Stat", "", 3, 0, 3) h.SetBinContent( 1, hist_eff.GetBinError(1) / hist_eff.GetBinContent(1) * 100.) h.SetBinContent( 2, hist_num.GetBinError(1) / hist_num.GetBinContent(1) * 100.) h.SetBinContent( 3, hist_den.GetBinError(1) / hist_den.GetBinContent(1) * 100.) hists.append(h) totalerrors[0] *= E( 1, hist_eff.GetBinError(1) / hist_eff.GetBinContent(1)) totalerrors[1] *= E( 1, hist_num.GetBinError(1) / hist_num.GetBinContent(1)) totalerrors[2] *= E( 1, hist_den.GetBinError(1) / hist_den.GetBinContent(1)) else: h = r.TH1F("{}".format(hist_num.GetName()), "", 3, 0, 3) h.SetBinContent( 1, hist_eff.GetBinError(1) / hist_eff.GetBinContent(1) * 100.) h.SetBinContent( 2, hist_num.GetBinError(1) / hist_num.GetBinContent(1) * 100.) h.SetBinContent( 3, hist_den.GetBinError(1) / hist_den.GetBinContent(1) * 100.) hists.append(h) totalerrors[0] *= E( 1, hist_eff.GetBinError(1) / hist_eff.GetBinContent(1)) totalerrors[1] *= E( 1, hist_num.GetBinError(1) / hist_num.GetBinContent(1)) totalerrors[2] *= E( 1, hist_den.GetBinError(1) / hist_den.GetBinContent(1)) h = r.TH1F("Total", "", 3, 0, 3) h.SetBinContent(1, totalerrors[0].err * 100.) h.SetBinContent(2, totalerrors[1].err * 100.) h.SetBinContent(3, totalerrors[2].err * 100.) hists.insert(2, h) return hists