def test_1d(self): bins = 1.0 * np.array([0, 3, 6, 9, 12, 15]) vals = 1.0 * np.array([1, 2, 3, 4, 5, 10, 13]) weights = 1.0 * np.array([1, 1, 1, 2, 2, 1, 1]) hr_ = r.TH1F("hr", "hr", len(bins) - 1, bins) fill_fast(hr_, vals, weights=weights) hr = Hist1D(hr_) hn = Hist1D(vals, bins=bins, weights=weights) self.assertEqual(hn, hr) self.assertEqual(hn.get_integral(), np.sum(weights)) self.assertEqual(hr.get_integral(), np.sum(weights)) self.assertEqual(np.all(hn.edges == bins), True) self.assertEqual(np.all(hr.edges == bins), True) check = np.histogram(vals, bins=bins, weights=weights)[0] self.assertEqual(np.all(hn.counts == check), True) self.assertEqual(np.all(hr.counts == check), True) self.assertEqual(Hist1D(hr_ * 2), hn * 2) self.assertEqual(Hist1D(hr_ + hr_), hn + hn) self.assertEqual(Hist1D(hr_ + 0.5 * hr_), hn + 0.5 * hn)
def test_1d_summing(self): np.random.seed(42) vals = np.random.normal(0, 1, 1000) bins = np.linspace(-3, 3, 10) h1 = Hist1D(vals, bins=bins) vals = np.random.normal(0, 1, 1000) h2 = Hist1D(vals, bins=bins) vals = np.random.normal(0, 1, 1000) h3 = Hist1D(vals, bins=bins) self.assertEqual(h1 + h2 + h3, sum([h1, h2, h3]))
def fill_hists(run): for task in run.tasks.values(): query = "SELECT px, py, pz FROM particle WHERE abs(pdgid)==6 AND abs(status)==1" df = run.query_events(task, query) df['pt'] = np.sqrt(df['px'] ** 2 + df['py'] ** 2) df['p'] = np.sqrt(df['px'] ** 2 + df['py'] ** 2 + df['pz'] ** 2) df['eta'] = np.arctanh(df['pz'] / df['p']) HISTS[(task, 'top_pt')] = Hist1D(np.array(df['pt']), bins=PT_BINS) HISTS[(task, 'top_eta')] = Hist1D(np.array(df['eta']), bins=ETA_BINS) for k, h in HISTS.items(): HISTS[k] = h / h.integral
def test_1d_summing_weights(self): bins = 1.0*np.array([0,3,6,9,12,15]) vals1 = 1.0*np.array([4,1,2,3,4,5,10,13]) weights1 = 1.0*np.array([-1,1,1,1,2,2,1,1]) vals2 = 1.0*np.array([4,0,2,3,4,-5,100,13]) weights2 = 1.0*np.array([-1,2,-1,1,2,2,-1,1]) hr1_ = r.TH1F("hr1","hr1", len(bins)-1, bins) hr2_ = r.TH1F("hr2","hr2", len(bins)-1, bins) fill_fast(hr1_, vals1, weights=weights1) fill_fast(hr2_, vals2, weights=weights2) hr1 = Hist1D(hr1_, no_overflow=True) hr2 = Hist1D(hr2_, no_overflow=True) hn1 = Hist1D(vals1,bins=bins, weights=weights1) hn2 = Hist1D(vals2,bins=bins, weights=weights2) self.assertEqual(hr1+hr2, hn1+hn2)
def test_1d_rebinning(self): np.random.seed(42) nrebin = 5 h1 = Hist1D(np.random.normal(0,5,1000), bins=np.linspace(-10,10,21)) nbins_before = len(h1.edges) - 1 int_before = h1.get_integral() h1.rebin(nrebin) nbins_after = len(h1.edges) - 1 int_after = h1.get_integral() self.assertEqual(int_before, int_after) self.assertEqual(nbins_after, nbins_before // nrebin)
def test_2d_projections_and_profiles(self): np.random.seed(42) xbins = np.array([10.,15.,25.,35.,50.0,70,90]) ybins = np.array([0.,0.8,1.479,2.5]) h2 = r.TH2F("2d_projprof","",len(xbins)-1,xbins,len(ybins)-1,ybins) N = 10000 for x in np.c_[ (78.*np.random.random(N)+11), (2.499*np.random.random(N)), ]: w = 1. if x[1] > 1.2: w = 2. h2.Fill(x[0],x[1], w) h2m = Hist2D(h2) self.assertEqual(Hist1D(h2.ProjectionX()),h2m.get_x_projection()) self.assertEqual(Hist1D(h2.ProjectionY()),h2m.get_y_projection()) profx = h2.ProfileX() profy = h2.ProfileY() # note, TProfile does weird thing to overflow, even though they are empty # so explicitly exclude them rx = Hist1D(profx,no_overflow=True) ry = Hist1D(profy,no_overflow=True) nx = h2m.get_x_profile() ny = h2m.get_y_profile() # XXX profile errors don't match ROOT's, so zero them all out before comparing rx._errors *= 0. ry._errors *= 0. nx._errors *= 0. ny._errors *= 0. self.assertEqual(rx,nx) self.assertEqual(ry,ny)
2016: 35.9, 2017: 41.5, 2018: 58.8 }[year]) ], selection= "hyp_class==3 && br && fired_trigger && passes_met_filters && sr>2" ) arr.dtype.names = ( "sr-2", "weight", ) label = transform_label(stag) h = Hist1D(arr["sr-2"], weights=arr["weight"], bins=bins, label=label, color=color) tmp.append(h) except: print "ERROR with {} for {}".format(stag, year) continue h = sum(tmp) hists.append(h) fname = "plots_rares/h_{}.pdf".format(which) plot_stack( bgs=hists[::-1], filename=fname, do_log=False, # xlabel="SR",
def worker(info): global files, signames_ analysis, outputdir, year, lumi, (var, (regions, xlabel)) = info sigstrs = [r"fcnc"] fnames = [] for region in regions: title = region.upper() lumi_ = str(lumi) if analysis == "fcnc": if title in ["br"]: title = "Baseline" if title in ["ml", "ss"] and (var == "TOTAL"): title = title.replace("sr", "") region_for_hist = region[:] if region == "brpostfit": region_for_hist = "br" def get_sf(proc): return d_crpostfitsf.get(proc, 1.0) bgs = [ sum( get_sf(proc) * Hist1D( files[y][proc]["h_{}_{}_{}".format( region_for_hist, var, proc)], label="{} ($\\times${:.2f})".format( label, get_sf(proc)), color=color, dummy=proc, ) for y in files.keys()) for proc, (label, color) in d_label_colors[analysis].items() ] else: bgs = [ sum( Hist1D(files[y][proc]["h_{}_{}_{}".format( region_for_hist, var, proc)], label=label, color=color, dummy=proc) for y in files.keys()) for proc, (label, color) in d_label_colors[analysis].items() ] # data = sum([Hist1D(files[y]["data"]["h_{}_{}_data".format(region_for_hist,var)]) for y in files.keys()]) # sigcolors = ["red","#5863F8","#FCCA46","#04A777","#944BBB","#233D4D"] sigcolors = [[0.75, 0.15, 0.22], "#5863F8", "#FCCA46", "#04A777", "#944BBB", "#233D4D"] # print sigcolors if region == "brpostfit": region_for_hist = "br" def get_sf(proc): return d_crpostfitsf.get(proc, 1.0) sigs = [ sum( get_sf(signame) * Hist1D( files[y][signame]["h_{}_{}_{}".format( region_for_hist, var, signame)], label="{} ($\\times${:.2f})".format( sigstr, get_sf(signame)), color=sigcolors[isig], dummy=signame, ) for y in files.keys()) for isig, (signame, sigstr) in enumerate(zip(signames_, sigstrs)) ] else: sigs = [ sum([ Hist1D(files[y][signame]["h_{}_{}_{}".format( region_for_hist, var, signame)], color=sigcolors[isig]) for y in files.keys() ]) for isig, signame in enumerate(signames_) ] for isig, sigstr in enumerate(sigstrs): sigs[isig].set_attr("label", sigstr) if signames_[isig] != "tttt": sigs[isig] *= 10. bgs = sorted(bgs, key=lambda bg: bg.get_integral()) for bg in bgs: print type(bg), bg # add flat systematic to stat unc in quadrature bg._errors = np.hypot( bg._counts * d_flat_systematics[analysis].get(bg.get_attr("dummy"), 0.), bg._errors) # if analysis == "ss" and region in ["SRML","ml","mlonz","mloffz"]: # # remove flips and ww since they are 0 for multilepton regions # new_bgs = [bg for bg in bgs if (bg.get_attr("dummy") not in ["flips","ww"])] # bgs = new_bgs # data.set_attr("label", "Data [{}]".format(int(data.get_integral()))) # if data.get_integral() < 1e-6: return if abs(sum(bgs).get_integral()) < 1e-6: continue ax_main_callback = None ax_ratio_callback = None mpl_legend_params = {} ratio_range = [0., 3.] xticks = [] mpl_xtick_params = {} # # FIXME FIXME FIXME # if analysis == "ss" and year != 2016 and not unblindall: # data._counts *= 0. # data._errors *= 0. # data.set_attr("label", "Data (blind)") # # FIXME FIXME FIXME # data._counts *= 0. # data._errors *= 0. # data.set_attr("label", "Data") if region in ["ml", "ss"] and (var == "TOTAL"): ratio_range = [0., 3.] mpl_legend_params["fontsize"] = 8 mpl_legend_params["framealpha"] = 0.4 mpl_legend_params["ncol"] = 2 mpl_legend_params["labelspacing"] = 0.12 # data.poissonify() # sbgs = sum(bgs) # pulls = binomial_obs_z(data.counts,sbgs.counts,sbgs.errors) # mu_pulls = pulls.mean() # sig_pulls = pulls.std() # if year != 2016 and not unblindall: # data._counts *= 0. # data._errors *= 0. # data.set_attr("label", "Data (blind)") def ax_main_callback(ax): ax.set_ylim([0.05, ax.get_ylim()[1] * 2.0]) ax.set_yscale("log", nonposy='clip'), if region in ["ml", "ss"]: xticks = range(1, 70) # def ax_ratio_callback(ax): # ax.text(0.18, -0.6,r"pulls $\mu,\sigma$ = {:.2f},{:.2f}".format(mu_pulls,sig_pulls), color="red", ha="center", va="center", fontsize=10.0, transform = ax.transAxes) # if region in ["SRML"]: # def ax_ratio_callback(ax): # ax.text(0.4, -0.6,"off-Z", color="blue", ha="center", va="center", fontsize=10.0, wrap=True, transform = ax.transAxes) # ax.text(0.55, -0.6,"on-Z", color="blue", ha="center", va="center", fontsize=10.0, wrap=True, transform = ax.transAxes) # ax.axvline(x=21.5, color="blue", lw=1.0) # ax.text(0.18, -0.6,"pulls $\mu,\sigma$ = {:.2f},{:.2f}$".format(mu_pulls,sig_pulls), color="red", ha="center", va="center", fontsize=10.0, transform = ax.transAxes) elif region.lower() in ["br"]: # blind all 2018 and all BDT plots since we will retrain # if year == 2018 or (len(files.keys()) > 1) or region.lower in ["srdisc"]: # data._counts *= 0. # data._errors *= 0. # data.set_attr("label", "Data (blind)") # data.convert_to_poisson_errors() if var.lower() in ["sr"]: xticks = range(1, 20) if region.lower() in ["srdisc"]: # def ax_main_callback(ax): # ax.set_ylim([0.1,ax.get_ylim()[1]*1.5]) # ax.set_yscale("log", nonposy='clip'), xticks = ["CRZ"] + range(1, 25) if (region.lower() in ["srcr"]) and (var.lower() in ["total"]): # if not (unblindall or year == 2016): # if year == 2018: # data._counts[2:] *= 0. # data._errors[2:] *= 0. # data.set_attr("label", "Data [{}]".format(int(data.get_integral()))) def ax_main_callback(ax): ax.set_ylim([0.1, ax.get_ylim()[1] * 1.5]) ax.set_yscale("log", nonposy='clip'), xticks = ["CRZ", "CRW"] + range(1, 20) # if (var.lower() in ["disc"]) and (region.lower() not in ["ttwcr","ttzcr"]): # # if not (unblindall or year == 2016): # if year == 2018: # data._counts[-10:] *= 0. # data._errors[-10:] *= 0. # data.set_attr("label", "Data [{}]".format(int(data.get_integral()))) if (var.lower() in ["charge", "el_charge", "mu_charge", "q3"]): xticks = ["$(-)$", "$(+)$", ""] mpl_xtick_params = dict(rotation=0, fontsize=14) # specific plots tuned by hand if analysis == "ss": if region.lower() == "br": if var.lower() == "charge": def ax_main_callback(ax): ax.set_xlim([-1, 2.6]) def ax_ratio_callback(ax): ax.set_xlim([-1, 2.6]) if len(files.keys()) > 1: fname = "{}/run2_{}_{}.pdf".format(outputdir, region, var) else: fname = "{}/y{}_{}_{}.pdf".format(outputdir, files.keys()[0], region, var) fnames.append(fname) ylabel = "Events" #binwidth = data.get_bin_widths()[0] binwidth = 1 if var in ["ht", "met", "njets", "nbtags"]: if var in ["ht", "met"]: ylabel = "Events / {} GeV".format(int(binwidth)) mpl_legend_params["fontsize"] = 12 if var in ["njets", "nbtags"] and region in ["ttwcr", "ttzcr"]: # data.poissonify() ratio_range = [0., 2.] if var in ["njets", "nbtags"] and region in ["sr"]: ratio_range = [0., 2.] # mpl_legend_params["framealpha"] = 0.4 # mpl_legend_params["ncol"] = 1 # mpl_legend_params["labelspacing"] = 0.10 if analysis == "ss": if var in ["ht", "met", "mtmin"] and region in ["br"]: # data.poissonify() mpl_legend_params["fontsize"] = 12 ratio_range = [0., 2.] ylabel = "Events / {} GeV".format(int(binwidth)) if var in ["njets", "nbtags"] and region in ["br"]: data.poissonify() mpl_legend_params["fontsize"] = 12 ratio_range = [0., 3.] if var in ["charge"] and region in ["br"]: data.poissonify() mpl_legend_params["fontsize"] = 12 ratio_range = [0., 2.] for do_log in [False, True]: fname_tmp = str(fname) if do_log: fname_tmp = fname.replace(".pdf", "_log.pdf").replace( ".png", "_log.png") # plot_stack(bgs=bgs, data=data, title=title, xlabel=xlabel, ylabel=ylabel, filename=fname_tmp, plot_stack( bgs=bgs, data=bgs[0], title=title, xlabel=xlabel, ylabel=ylabel, filename=fname_tmp, # cms_type = "Preliminary", cms_type="", lumi=lumi_, ratio_range=ratio_range, sigs=sigs, do_log=do_log, mpl_xtick_params=mpl_xtick_params, mpl_ratio_params={ "label": "Data/Pred.", }, xticks=xticks, mpl_sig_params={ # "hist":False, }, ax_main_callback=ax_main_callback, mpl_legend_params=mpl_legend_params, ax_ratio_callback=ax_ratio_callback, do_bkg_syst=True, ) if (region in ["br", "brpostfit", "ttwcr", "ttzcr"]): fname_tmp = fname.replace(".pdf", "_stacked.pdf").replace( ".png", "_stacked.png") plot_stack( bgs=bgs + sigs, data=data, title=title, xlabel=xlabel, ylabel=ylabel, filename=fname_tmp, # cms_type = "Preliminary", cms_type="", lumi=lumi_, ratio_range=ratio_range, # sigs=sigs, do_log=False, mpl_xtick_params=mpl_xtick_params, mpl_ratio_params={ "label": "Data/Pred.", }, xticks=xticks, mpl_sig_params={ # "hist":False, }, ax_main_callback=ax_main_callback, mpl_legend_params=mpl_legend_params, ax_ratio_callback=ax_ratio_callback, do_bkg_syst=True, ) # os.system("ic {}".format(fname)) # print bgs # print data # return table_info = write_table(bgs[0], bgs, signal=(None if not sigs else sigs[0]), outname=fname.replace(".pdf", ".txt")) #table_info = write_table(data,bgs,signal=(None if not sigs else sigs[0]),outname=fname.replace(".pdf",".txt")) # table_info = write_table(data,bgs,signal=sig,outname=fname.replace(".pdf",".txt"), # binlabels=xticks,signame=sigstrs[0].replace(r"$\times 10$","x10").replace(","," "),csv=True) return ", ".join(fnames)
def worker(info): global files, other_files outputdir, year, lumi, region, flav, var = info title = region.upper() xlabel = labels[var] hname = "{}_{}_{}".format(region, var, flav) if other_files: bgs = [ sum([Hist1D(files[proc][hname], label=label, color=color)] + [ Hist1D(other_files[y][proc][hname], label=label, color=color) for y in other_files.keys() ]) for proc, (label, color) in sorted(bginfo[region].items()) ] data = sum([Hist1D(files["data"][hname])] + [ Hist1D(other_files[y]["data"][hname]) for y in other_files.keys() ]) sigs = [ sum([ Hist1D(files["fcnc_hut"][hname], label="hut", color="#9D7ABF") ] + [ Hist1D(other_files[y]["fcnc_hut"][hname], label="hut", color="#9D7ABF") for y in other_files.keys() ]), sum([ Hist1D(files["fcnc_hct"][hname], label="hct", color="#8154AD") ] + [ Hist1D(other_files[y]["fcnc_hct"][hname], label="hct", color="#8154AD") for y in other_files.keys() ]) ] else: bgs = [ Hist1D(files[proc][hname], label=label, color=color) for proc, (label, color) in sorted(bginfo[region].items()) ] data = Hist1D(files["data"][hname]) sigs = [ Hist1D(files["fcnc_hut"][hname], label="hut", color="#9D7ABF"), Hist1D(files["fcnc_hct"][hname], label="hct", color="#8154AD") ] #print sigs data.set_attr("label", "Data [{}]".format(int(data.get_integral()))) sigs[0].set_attr("label", "hut [{:.1f}]".format(sigs[0].get_integral())) sigs[1].set_attr("label", "hct [{:.1f}]".format(sigs[1].get_integral())) #sum(sigs).set_attr("color", [1.0, 0.4, 1.0]) if data.get_integral() < 1e-6: return if abs(sum(bgs).get_integral()) < 1e-6: return do_bkg_syst = True bgs = sorted(bgs, key=lambda bg: bg.get_integral()) sf = data.get_integral() / sum(bgs).get_integral() #bgs = [bg*sf for bg in bgs] # bgs = [bg*1 for bg in bgs] for bg in bgs: # add flat systematic to stat unc in quadrature #print bg.get_attr("label") bg._errors = np.hypot( bg._counts * d_flat_systematics.get(bg.get_attr("label"), 0.), bg._errors) if plotdata: title += " data/MC={:.2f}".format(sf) if other_files: fname = "{}/run2_{}_{}_{}.pdf".format(outputdir, region, var, flav) else: fname = "{}/year{}_{}_{}_{}.pdf".format(outputdir, year, region, var, flav) if plotdata: plot_stack( bgs=bgs, data=data, sigs=sigs, title=title, xlabel=xlabel, filename=fname, cms_type="Preliminary", # do_log=True, do_bkg_syst=do_bkg_syst, lumi=lumi, ratio_range=[0.0, 2.0], mpl_title_params=dict(fontsize=(8 if len(str(lumi)) >= 5 else 9)), # ratio_range=[0.5,1.5], ) fname_log = fname.replace(".pdf", "_log.pdf").replace(".png", "_log.png") plot_stack( bgs=bgs, data=data, sigs=sigs, title=title, xlabel=xlabel, filename=fname_log, cms_type="Preliminary", do_log=True, do_bkg_syst=do_bkg_syst, lumi=lumi, ratio_range=[0.0, 2.0], mpl_title_params=dict(fontsize=(8 if len(str(lumi)) >= 5 else 9)), # ratio_range=[0.5,1.5], ) if not plotdata: plot_stack( bgs=bgs, #data=data, sigs=sigs, ratio=sigs[0].divide(sum(bgs)), title=title, xlabel=xlabel, filename=fname, cms_type="Preliminary", # do_log=True, do_bkg_syst=do_bkg_syst, lumi=lumi, ratio_range=[0.0, 2.0], mpl_title_params=dict(fontsize=(8 if len(str(lumi)) >= 5 else 9)), mpl_ratio_params={"label": "hut/Bkgd"}, # ratio_range=[0.5,1.5], ) fname_log = fname.replace(".pdf", "_log.pdf").replace(".png", "_log.png") plot_stack( bgs=bgs, #data=data, sigs=sigs, ratio=sigs[0].divide(sum(bgs)), title=title, xlabel=xlabel, filename=fname_log, cms_type="Preliminary", do_log=True, do_bkg_syst=do_bkg_syst, lumi=lumi, ratio_range=[0.0, 2.0], mpl_title_params=dict(fontsize=(8 if len(str(lumi)) >= 5 else 9)), mpl_ratio_params={"label": "hut/Bkgd"}, # ratio_range=[0.5,1.5], ) # os.system("ic {}".format(fname)) #write_table(data,bgs,outname=fname.replace(".pdf",".txt")) return fname
[region + "_mossf_in", "mossf"], # #[region+"__in",""], [region + "_ptj1_in", "1st jet pt"], [region + "_ptj2_in", "2nd jet pt"], [region + "_ptj3_in", "3rd jet pt"], #[region+"_ptj4_in","4th jet pt"], [region + "_ptbt1_in", "1st bjet pt"], [region + "_ptbt2_in", "2nd bjet pt"], # [region+"_ptbt3_in","3rd bjet pt"], # [region+"_ptbt4_in","4th bjet pt"], [region + "_fwd_jetpt_in", "fwd jet pt"], # ] for plot in range(len(plotname)): data = sum([ Hist1D(files[y]["data"][plotname[plot][0]], label="data") for y in files.keys() ]) sigs = [ sum([ Hist1D(files[y][signalname][plotname[plot][0]], label=signalname, color=d_label_colors.get(signalname)) for y in files.keys() ]) ] bgs = [ sum([ Hist1D(files[y][proc][plotname[plot][0]], label=proc, color=d_label_colors.get(proc)) for y in files.keys()