def smooth_bkg_templates(fnames_to_run): """ Function that writes linearized mtt vs costheta distributions to root file. """ if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) #set_trace() for bkg_file in fnames_to_run: hdict = load(bkg_file) jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets" for lep in hdict.keys(): for tname, orig_template in hdict[lep].items(): #set_trace() proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = sorted(filter(None, tname.split(f"{proc}_")))[0] #if sys == "nosys": continue print(lep, jmult, sys, proc) # perform smoothing smoothed_histo = hdict[lep][f"{proc}_nosys"].copy( ) if sys == "nosys" else Plotter.smoothing_mttbins( nosys=hdict[lep][f"{proc}_nosys"], systematic=orig_template, mtt_centers=mtt_centers, nbinsx=len(linearize_binning[0]) - 1, nbinsy=len(linearize_binning[1]) - 1) ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][tname] = smoothed_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][tname] = smoothed_histo.copy() #set_trace() if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( input_dir, f"test_smoothed_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( input_dir, f"test_smoothed_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea" ) save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written")
ctstar_binlabels = ["%s $\leq$ cos($\\theta^{*}_{t_{h}}$) $\leq$ %s" % (ctstar_binning[bin], ctstar_binning[bin+1]) for bin in range(len(ctstar_binning)-1)]*len(mtt_binlabels) ## get values from NNLO root file nnlo_fname = "MATRIX_ttmVStheta.root" # "xsec_central" dist has only statistical uncs #nnlo_fname = "matrixhists_NNPDF.root" # "cen" dist has only statistical uncs #nnlo_fname = "MATRIX_17_abs.root" # has scale and statistical uncs nnlo_file = convert_histo_root_file(os.path.join(proj_dir, "NNLO_files", nnlo_fname)) if nnlo_fname == "MATRIX_ttmVStheta.root": variables = [ ("mtt_vs_thad_ctstar", "xsec_central", "m($t\\bar{t}$) $\otimes$ cos($\\theta^{*}_{t_{h}}$)", "$\dfrac{d^{2} \\sigma}{d m(t\\bar{t}) d cos(\\theta^{*}_{t_{h}})}$", True), #True, [6*ybin for ybin in range(1, 6)]), # last element is hardcoded from binning ] nnlo_dict = Plotter.root_converters_dict_to_hist(nnlo_file, vars=[val[1] for val in variables], sparse_axes_list=[{"name": "dataset", "label" : "Event Process", "fill" : "nnlo"}], dense_axes_list=[{"name": "mtt", "idx" : 1}, {"name" : "ctstar", "idx" : 0}], transpose_da=True, #dense_axes_list=[{"name" : "ctstar", "idx" : 0}, {"name": "mtt", "idx" : 1}], ) mtt_binning = nnlo_dict["xsec_central"].axis("mtt").edges() #mtt_binlabels = ["%s $\leq$ m($t\\bar{t}$) $\leq$ %s" % (mtt_binning[bin], mtt_binning[bin+1]) for bin in range(len(mtt_binning)-1)] ctstar_binning = nnlo_dict["xsec_central"].axis("ctstar").edges() vlines = [(len(mtt_binning)-1)*ybin for ybin in range(1, len(ctstar_binning)-1)] #set_trace() # linearize nnlo_hist nnlo_dict = {name: Plotter.linearize_hist(nnlo_dict[name].integrate("dataset")) for name in nnlo_dict.keys()} png_ext = "StatUncs" nnlo_leg = "(Stat.)" else: variables = [
] * len(mtt_binlabels) mtt_bin_locs = np.linspace( (len(ctstar_binning) - 1) / 2, (len(ctstar_binning) - 1) * (len(mtt_binning) - 1) - (len(ctstar_binning) - 1) / 2, len(mtt_binning) - 1) vlines = [ len(mtt_binlabels) * ybin for ybin in range(1, len(mtt_binlabels)) ] # last element is hardcoded from binning # easier to rename sparse axis than change linearize() tmp_histo = histo.copy() tmp_histo = tmp_histo.group( histo.sparse_axes()[0].name, process_axis, {key[0]: key[0] for key in histo.values().keys()}) hline = Plotter.linearize_hist(tmp_histo, no_transpose=True) # revert sparse axis name to original hline = hline.group(hline.sparse_axes()[0].name, reweighting_axis, {key[0]: key[0] for key in histo.values().keys()}) histo = hline xtitle, rebinning, x_lims = variables[hname] if rebinning != 1: histo = histo.rebin(*axes_to_sum, rebinning) # orig fig, (ax, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True)
} # use SL+DL+Had events fig, (ax, rax) = plt.subplots( 2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) for rewt, hslice in allTT_hslices.items(): vals, bins = hslice.values()[( )], hslice.axis(xaxis_name).edges() ax = Plotter.plot_1D( vals, bins, xlimits=x_lims, ax=ax, histtype='step', label=rewt_style_dict[rewt][0], color=rewt_style_dict[rewt][1]) if (rewt != nominal_jobid) and ( nominal_jobid in rewt_style_dict.keys()): ratio_vals, ratio_bins = Plotter.get_ratio_arrays( num_vals=vals, denom_vals=allTT_hslices[nominal_jobid]. values()[()], input_bins=bins) rax.step(ratio_bins, ratio_vals, where='post', **{
x_lims = (0., 0.1) if lepcat == 'Tight' else (0., 0.5) #if hname == 'mass_disc': # x_lims = (3., 13.) if jmult == '3Jets' else (5., 15.) #if hname == 'full_disc': # x_lims = (6., 20.) if jmult == '3Jets' else (10., 22.) mc_opts = { # 'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right'] } if withData: ax, rax = Plotter.plot_stack1d(ax, rax, hslice, xlabel=xtitle, xlimits=x_lims, **mc_opts) else: ax = Plotter.plot_mc1d(ax, hslice, xlabel=xtitle, xlimits=x_lims, **mc_opts) if hname == 'Jets_njets': print(jmult) yields_txt, yields_json = Plotter.get_samples_yield_and_frac( hslice, data_lumi_year['%ss' % args.lepton] / 1000., promptmc=True)
**smooth_styles) # smoothed template if not np.array_equal(flat_sys.values()[()], orig_sys.values()[()]): hep.plot.histplot(flat_sys.values()[()], nominal.dense_axes()[0].edges(), ax=ax, histtype="step", **flat_styles) # flattened template ax.legend(loc="upper right", title=f"{sys}, {proc}") ax.set_ylabel("Events") ax.autoscale() ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15) # plot relative deviation orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_sys.values()[()] - nominal.values()[()], denom_vals=nominal.values()[()], input_bins=nominal.dense_axes()[0].edges()) rax.step(orig_masked_bins, orig_masked_vals, where='post', **orig_styles) if not np.array_equal(smooth_sys.values()[()], orig_sys.values()[()]): smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays( num_vals=smooth_sys.values()[()] - nominal.values()[()], denom_vals=nominal.values()[()], input_bins=nominal.dense_axes()[0].edges()) rax.step(smooth_masked_bins, smooth_masked_vals, where='post',
plot.plot1d( hslice, overlay=hslice.axes()[0].name, ax=ax, clear=False, line_opts={'linestyle': '-'}, ) # norm #set_trace() for corr in sorted(hslice.values().keys()): Plotter.plot_1D(values=hslice.values()[corr] / np.sum(hslice.values()[corr]), bins=hslice.dense_axes()[0].edges(), ax=ax_norm, xlimits=x_lims, xlabel=xtitle, ylabel='Probability Density', label=corr[0], histtype='step') ## set legend and corresponding colors handles, labels = ax.get_legend_handles_labels() for idx, label in enumerate(labels): if label == '4Jets': labels[idx] = jet_mults[label] handles[idx].set_color('b') handles[idx].set_linewidth(2) elif label == '5PJets': labels[idx] = jet_mults[label] handles[idx].set_color('g')
if not os.path.isdir(pltdir): os.makedirs(pltdir) opts = { "legend_title" : sys, "maskData" : maskData, } sys_histo = hdict[(args.lepton, jmult, sys, hname)] sys_histo = sys_histo.group(process_cat, process, process_groups) fig, (ax, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) if hname == "Jets_njets": yields_txt, yields_json = Plotter.get_samples_yield_and_frac(sys_histo, data_lumi_year["%ss" % args.lepton]/1000., sys=sys) frac_name = "%s_%s_yields_and_fracs_QCD_Est" % (sys, "_".join([jmult, args.lepton])) plt_tools.print_table(yields_txt, filename="%s/%s.txt" % (pltdir, frac_name), print_output=True) print("%s/%s.txt written" % (pltdir, frac_name)) with open("%s/%s.json" % (pltdir, frac_name), "w") as out: out.write(prettyjson.dumps(yields_json)) ax, rax = Plotter.plot_stack1d(ax, rax, sys_histo, xlabel=xtitle, xlimits=x_lims, **opts) # add lepton/jet multiplicity label ax.text( 0.02, 0.92, "%s, %s" % (leptypes[args.lepton], jet_mults[jmult]), fontsize=rcParams["font.size"]*0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes ) ## draw vertical lines for distinguishing different ctstar bins if vlines is not None:
) + hslice['DL mu tau->l'].copy() + hslice[ 'DL mu tau->h'].copy() + hslice['DL tau tau->ll'].copy( ) + hslice['DL tau tau->lh'].copy( ) + hslice['DL tau tau->hh'].copy() #set_trace() mc_opts = { #'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right'] #'maskData' : not withData } fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) ax = Plotter.plot_mc1d(ax, plt_histo, xlabel=xtitle, xlimits=x_lims, **mc_opts) # add lepton/jet multiplicity label #set_trace() ax.text(0.02, 0.88, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]), fontsize=rcParams['font.size'] * 0.75, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) hep.cms.cmslabel( ax=ax,
os.makedirs(pltdir) print(', '.join([jmult, lepcat, btagregion, hname])) hslice = histo[:, jmult, btagregion, lepcat].integrate('jmult').integrate('lepcat').integrate('btag') if hname == 'Lep_iso': if args.lepton == 'Muon': x_lims = (0., 0.15) if lepcat == 'Tight' else (0.15, 1.) if args.lepton == 'Electron': x_lims = (0., 0.1) if lepcat == 'Tight' else (0., 0.5) # plot original yields fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) Plotter.plot_1D(hslice.values()[('Before',)], hslice.axis(xaxis_name).edges(), xlimits=x_lims, ax=ax, label='Runs $<$ 319077') Plotter.plot_1D(hslice.values()[('After',)], hslice.axis(xaxis_name).edges(), xlimits=x_lims, xlabel=xtitle, color='r', ax=ax, label='Runs $\\geq$ 319077') ax.legend(loc='upper right') if hname == 'Jets_njets': print(jmult) #set_trace() rows = [("Lumi: %s fb^-1" % format(data_lumi_year['%ss' % args.lepton]/1000., '.1f'), "Yield", "Error", "Frac")] rows += [("Runs < 319077", format(sum(hslice.values(overflow='all')[('Before',)]), '.1f'), format(np.sqrt(sum(hslice.values(overflow='all', sumw2=True)[('Before',)][1])), '.1f'), format(sum(hslice.values(overflow='all')[('Before',)])/sum(hslice.sum('hem').values(overflow='all')[()]), '.3f'))] rows += [("Runs >= 319077", format(sum(hslice.values(overflow='all')[('After',)]), '.1f'), format(np.sqrt(sum(hslice.values(overflow='all', sumw2=True)[('After',)][1])), '.1f'), format(sum(hslice.values(overflow='all')[('After',)])/sum(hslice.sum('hem').values(overflow='all')[()]), '.3f'))] rows += [("Total", format(sum(hslice.sum('hem').values(overflow='all')[()]), '.1f'), format(np.sqrt(sum(hslice.sum('hem').values(overflow='all', sumw2=True)[()][1])), '.1f'), "")] frac_name = '%s_yields_and_fracs.txt' % '_'.join([jmult, args.lepton, lepcat, btagregion]) plt_tools.print_table(rows, filename=os.path.join(pltdir, frac_name), print_output=True)
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use][ Plotter. nonsignal_samples] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = [ "*right", "*matchable", "*unmatchable", "*sl_tau", "*other" ] names = [ dataset for dataset in sorted(set([key[0] for key in histo.values().keys()])) ] # get dataset names in hists ttJets_cats = [ name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats]) ] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting="placement") process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" #set_trace() ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset") lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = "_".join(tt_cat.split( "_")[:-2]) if "sl_tau" in tt_cat else "_".join( tt_cat.split("_") [:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate("leptype") #set_trace() systs = sorted(set([key[1] for key in histo.values().keys()])) systs.insert(0, systs.pop( systs.index("nosys"))) # move "nosys" to the front # loop over each jet multiplicity for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) # get sideband and signal region hists cen_sb_histo = Plotter.linearize_hist( histo[:, "nosys", jmult, btag_reg_names_dict["Central"]["reg"]].integrate( "jmult").integrate("btag").integrate("sys")) #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag") #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag") sig_histo = Plotter.linearize_hist( histo[:, :, jmult, btag_reg_names_dict["Signal"]["reg"]].integrate( "jmult").integrate("btag")) # loop over each systematic for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue sys_histo = sig_histo[:, sys].integrate( "sys") if sys in systematics.ttJets_sys.values( ) else Plotter.BKG_Est( sig_reg=sig_histo[:, sys].integrate("sys"), sb_reg=cen_sb_histo, norm_type="SigMC", sys=sys, ignore_uncs=True) ## write nominal and systematic variations for each topology to file #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])): for proc in sorted( set([key[0] for key in sys_histo.values().keys()])): if ("tt" not in proc) and ( sys in systematics.ttJets_sys.values()): continue #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue if (proc == "data_obs") and not (sys == "nosys"): continue if not sys_histo[proc].values().keys(): #if not sig_histo[proc, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping" ) continue print(args.year, lep, jmult, sys, proc) #set_trace() outhname = "_".join( list( filter(None, [ proc, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = sys_histo[proc].integrate("process") #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys") #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{proc}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{proc}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def get_sig_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ widthTOname = lambda width: str(width).replace(".", "p") nameTOwidth = lambda width: str(width).replace("p", ".") ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError(f"{hname_to_use} not found in file") xrebinning, yrebinning = linearize_binning #xrebinning, yrebinning = mtt_ctstar_2d_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[Plotter.signal_samples, :, :, :, "btagPass"].integrate("btag") names = [ dataset for dataset in sorted( set([key[0] for key in rebin_histo.values().keys()])) ] # get dataset names in hists signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) signals = [sig for sig in signals if "TTJetsSL" in sig] # only use SL decays systs = sorted(set([key[1] for key in rebin_histo.values().keys()])) systs.insert(0, systs.pop(systs.index("nosys"))) # move "nosys" to the front # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # write signal dists to temp file for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" # scale by lumi lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") histo = histo.group( "dataset", hist.Cat("process", "Process", sorting="placement"), process_groups) for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) #set_trace() lin_histo = Plotter.linearize_hist( histo[:, :, jmult, lep].integrate("jmult").integrate("leptype")) for signal in signals: if "Int" in signal: boson, mass, width, pI, wt = tuple(signal.split("_")) else: boson, mass, width, pI = tuple(signal.split("_")) sub_name = "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower(), wt ]) if pI == "Int" else "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower() ]) #set_trace() for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue if not lin_histo[signal, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping" ) continue print(args.year, lep, jmult, sub_name, sys) outhname = "_".join( list( filter(None, [ sub_name, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = lin_histo[signal, sys].integrate( "process").integrate("sys") ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{signal}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{signal}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) ax.axhline(1, **{"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}) ax.text( 0.02, 0.90, "$t\\bart \\rightarrow e/\mu + jets$\nparton level", fontsize=rcParams["font.size"], horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes ) #set_trace() for year in years_to_run: histo = ratios[year][var] # get histogram if histo._dimension == 1: orig_bins, orig_vals = histo._axes, histo._values # plot original distribution Plotter.plot_1D(orig_vals, orig_bins, xlabel=var_opts[var]["xtitle"], ylabel=var_opts[var]["ytitle"], color=year_opts[year]["col"], ax=ax, label="%s Original" % year_opts[year]["leg_title"]) # get interpolated values from ROOT Interpolate orig_ratio_hist = Hist(orig_bins, name="", title="") for xbin in range(1, orig_bins.size): orig_ratio_hist[xbin] = orig_vals[xbin-1] output_bins = np.arange(min(orig_bins), max(orig_bins)+10, 10) interped_array = np.zeros(output_bins.size-1) for xbin in range(output_bins.size-1): interped_array[xbin] = orig_ratio_hist.Interpolate(output_bins[xbin]) Plotter.plot_1D(interped_array, output_bins, xlabel=var_opts[var]["xtitle"], ylabel=var_opts[var]["ytitle"], color=year_opts[year]["col"], ax=ax, label="%s Interp" % (year_opts[year]["leg_title"]), linestyle="--") if args.save_ratios: lookup = dense_lookup(*(interped_array, output_bins))
#alpha_medians, alpha_errors = np.array(alpha_median), np.array(alpha_median_errs) #binned_mtt_medians[idx] = alpha_medians #binned_mtt_errors[idx] = alpha_errors alpha_median = get_median_from_2d(hslice, 'norm_mthad', xmin=mthad_fit_range[0], xmax=mthad_fit_range[1]) alpha_medians = np.array(alpha_median) binned_mtt_medians[idx] = alpha_medians Plotter.plot_2d_norm(hslice, xaxis_name='norm_mthad', yaxis_name=alpha_axis_name, values=np.ma.masked_where( hslice.values()[()] <= 0., hslice.values()[()]), xlimits=mthad_lims, ylimits=alpha_lims, xlabel=mthad_title, ylabel=alpha_title, ax=ax, **opts) mtt_label = '$m_{t\\bar{t}}^{Reco}$ $\geq$ %s' % bin_min if idx == len( mtt_bin_ranges ) - 1 else '%s $\leq$ $m_{t\\bar{t}}^{Reco}$ $<$ %s' % ( bin_min, bin_max) # add lepton/jet multiplicity label ax.text( 0.02, 0.84, "%s\n%s" % (blurb, mtt_label),
for key in histo.values().keys()]))): objlabel, mass_range = objects[obj] new_xtitle = xtitle.replace('obj', objlabel) if ('mass' in hname) and (hname != 'Reso_mass'): x_lims = mass_range fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) hslice = histo[:, jmult, btagregion, lepcat, obj].integrate('jmult').integrate( 'lepcat').integrate( 'btag').integrate('objtype') Plotter.plot_mc1d(ax, hslice, xlabel=new_xtitle, xlimits=x_lims, ylabel='Events') # add lepton/jet multiplicity label ax.text(0.02, 0.90, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]), horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) hep.cms.label(ax=ax, data=False, paper=False,
## get values from NNLO root file nnlo_fname = 'MATRIX_ttmVStheta.root' # 'xsec_central' dist has only statistical uncs #nnlo_fname = 'matrixhists_NNPDF.root' # 'cen' dist has only statistical uncs #nnlo_fname = 'MATRIX_17_abs.root' # has scale and statistical uncs nnlo_file = convert_histo_root_file( os.path.join(proj_dir, 'NNLO_files', nnlo_fname)) nnlo_var = 'xsec_central' nnlo_dict = Plotter.root_converters_dict_to_hist( nnlo_file, vars=[nnlo_var], sparse_axes_list=[{ 'name': 'dataset', 'label': "Event Process", 'fill': 'nnlo' }], #dense_axes_list=[{'name': 'mtt', 'idx' : 1}, {'name' : 'ctstar', 'idx' : 0}], #transpose_da=True, dense_axes_list=[{ 'name': 'ctstar', 'idx': 0 }, { 'name': 'mtt', 'idx': 1 }], ) mtt_binning = nnlo_dict[nnlo_var].axis('mtt').edges() ctstar_binning = nnlo_dict[nnlo_var].axis('ctstar').edges() vlines = [(len(mtt_binning) - 1) * ybin for ybin in range(1, len(ctstar_binning) - 1)] #set_trace()
def plot_bkg_templates(fnames_to_run): """ Runs LOWESS smoothing algorithm ntoys times and finds 1 and 2 sigma bands for interpolation """ for bkg_file in fnames_to_run: hdict = load(bkg_file) jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets" for tname, orig_template in hdict[args.lepton].items(): proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = sorted(filter(None, tname.split(f"{proc}_")))[0] if proc == "BKG": continue #if sys not in ["hdampUP", "hdampDOWN", "mtop1665", "mtop1695", "mtop1715", "mtop1735", "mtop1755", "mtop1785", "ueUP", "ueDOWN"]: continue if sys == "nosys": continue print(args.lepton, jmult, sys, proc) nosys_hist = hdict[args.lepton][f"{proc}_nosys"].copy() orig_smooth_hist = Plotter.smoothing_mttbins( nosys=nosys_hist, systematic=orig_template, mtt_centers=mtt_centers, nbinsx=nbinsx, nbinsy=nbinsy) x_lims = (0, nosys_hist.dense_axes()[0].centers().size) # get vals and errors of systematic variation sys_histo_vals, sys_histo_sumw2 = orig_template.values( sumw2=True)[()] sys_histo_errs = np.sqrt(sys_histo_sumw2) # make toys based on Gaussian distribution of mu=bin_val, sigma=bin_error toy_arrays = np.zeros((nbins, ntoys)) for idx in range(nbins): toy_arrays[idx] = np.random.normal(sys_histo_vals[idx], sys_histo_errs[idx], size=ntoys) # get smoothed relative deviation distributions from toys smoothed_rel_dev_arrays = np.zeros((ntoys, nbins)) chi2_pvals = np.zeros((ntoys, 2)) for idx in range(ntoys): smoothed_array = Plotter.smoothing_mttbins( nosys=nosys_hist, systematic=(toy_arrays.T)[idx], mtt_centers=mtt_centers, nbinsx=nbinsx, nbinsy=nbinsy) chi2_pval = chisquare( f_obs=smoothed_array, f_exp=orig_smooth_hist.values()[()] ) # convert to expected yields so inputs are greater than 5 chi2_pvals[idx] = np.array( [chi2_pval.statistic, chi2_pval.pvalue]) smoothed_rel_dev_arrays[idx] = ( smoothed_array - nosys_hist.values()[()]) / nosys_hist.values()[()] ## find 68% and 95% intervals plus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals = np.zeros( nbins), np.zeros(nbins) plus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals = np.zeros( nbins), np.zeros(nbins) for bin in range(nbins): plus_one_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[plus_one_sigma_ind] minus_one_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[minus_one_sigma_ind] plus_two_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[plus_two_sigma_ind] minus_two_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[minus_two_sigma_ind] # plot relative deviation fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) # original relative deviations orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_template.values()[()] - nosys_hist.values()[()], denom_vals=nosys_hist.values()[()], input_bins=nosys_hist.dense_axes()[0].edges()) ax.step(orig_masked_bins, orig_masked_vals, where="post", **{ "color": "k", "linestyle": "-", "label": "Original" }) # original smoothing relative deviations orig_smoothed_masked_vals, orig_smoothed_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_smooth_hist.values()[()] - nosys_hist.values()[()], denom_vals=nosys_hist.values()[()], input_bins=nosys_hist.dense_axes()[0].edges()) ax.step(orig_smoothed_masked_bins, orig_smoothed_masked_vals, where="post", **{ "color": "r", "linestyle": "-", "label": "Original Smoothing" }) # plot 68 and 95% intervals for yields ax.fill_between(nosys_hist.dense_axes()[0].edges(), np.r_[minus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals[-1]], np.r_[plus_one_sigma_smooth_vals, plus_one_sigma_smooth_vals[-1]], where=np.r_[plus_one_sigma_smooth_vals, plus_one_sigma_smooth_vals[-1]] > np.r_[minus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals[-1]], step="post", **{ "label": "68%", "facecolor": "#00cc00", "alpha": 0.5 }) ax.fill_between(nosys_hist.dense_axes()[0].edges(), np.r_[minus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals[-1]], np.r_[plus_two_sigma_smooth_vals, plus_two_sigma_smooth_vals[-1]], where=np.r_[plus_two_sigma_smooth_vals, plus_two_sigma_smooth_vals[-1]] > np.r_[minus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals[-1]], step="post", **{ "label": "95%", "facecolor": "#ffcc00", "alpha": 0.5 }) ax.legend(loc="upper right", title=f"{sys}, {proc}") ax.axhline( 0, **{ "linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1 }) ax.autoscale() ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15) ax.set_xlim(x_lims) ax.set_xlabel( "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|") ax.set_ylabel("Rel. Deviaton from Nominal") # add lepton/jet multiplicity label ax.text(0.02, 0.94, f"{leptypes[args.lepton]}, {jet_mults[jmult]}", fontsize=rcParams["font.size"] * 0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes) ## draw vertical lines for distinguishing different ctstar bins vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)] for vline in vlines: ax.axvline(vline, color="k", linestyle="--") hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(data_lumi_year[f"{args.lepton}s"] / 1000., 1)) #set_trace() pltdir = os.path.join(outdir, args.lepton, jmult, sys) if not os.path.isdir(pltdir): os.makedirs(pltdir) figname = os.path.join( pltdir, "_".join([ jmult, args.lepton, sys, proc, "SmoothingConfidenceIntervals" ])) fig.savefig(figname) print(f"{figname} written") plt.close()
if not os.path.isdir(pltdir): os.makedirs(pltdir) print(hname, jmult) hslice = h_tot[jmult, :].integrate('jmult') if rebinning != 1: xaxis_name = hslice.dense_axes()[0].name hslice = hslice.rebin(xaxis_name, rebinning) # make histo with event all individual categories for 'Lost' and 'Merged' indiv_cat_histo = hslice.group(orig_axis, all_evt_cat, indiv_evt_groups) # hists with yields fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) Plotter.plot_mc1d(ax=ax, hdict=indiv_cat_histo, xlabel=xtitle, ylabel='Events', xlimits=x_lims, hist_styles=indiv_cat_styles, **{'error_opts':None, 'leg_ncols':2}) ax.set_ylim(0, ax.get_ylim()[1]*1.1) # add lep category ax.text( 0.02, 0.90, "$e/\mu$, %s\nParton Level" % jet_mults[jmult], fontsize=rcParams['font.size'], horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) ## set axes labels and titles hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(lumi_to_use, 1)) #set_trace() figname = os.path.join(pltdir, '_'.join([args.year, jobid, jmult, 'Indiv_Cats', hname])) fig.savefig(figname) print('%s written' % figname) #set_trace()
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
fig.subplots_adjust(hspace=.07) hslice = histo[:, jmult, btagregion, lepcat].integrate('jmult').integrate('lepcat').integrate('btag') if hname == 'Lep_iso': if args.lepton == 'Muon': x_lims = (0., 0.15) if lepcat == 'Tight' else (0.15, 1.) if args.lepton == 'Electron': x_lims = (0., 0.1) if lepcat == 'Tight' else (0., 0.5) mc_opts = { #'mcorder' : ['QCD', 'EWK', 'singlet', 'ttJets'] if not ttJets_cats else ['QCD', 'EWK', 'singlet', 'ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right'] #'maskData' : not withData } Plotter.plot_stack1d(ax, rax, hslice, xlabel=xtitle, xlimits=x_lims, **mc_opts) #set_trace() if hname == 'Jets_njets': print(jmult) yields_txt, yields_json = Plotter.get_samples_yield_and_frac(hslice, data_lumi_year['%ss' % args.lepton]/1000., promptmc=True) frac_name = '%s_yields_and_fracs' % '_'.join([jmult, args.lepton, lepcat, btagregion]) plt_tools.print_table(yields_txt, filename='%s/%s.txt' % (pltdir, frac_name), print_output=True) print('%s/%s.txt written' % (pltdir, frac_name)) with open('%s/%s.json' % (pltdir, frac_name), 'w') as out: out.write(prettyjson.dumps(yields_json)) # add lepton/jet multiplicity label #set_trace() ax.text( 0.02, 0.88, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]),
def get_bkg_templates(fnames_to_run): """ Function that writes linearized mtt vs costheta distributions to root file. """ #set_trace() for bkg_file in fnames_to_run: hdict = load(bkg_file) jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets" for lep in hdict.keys(): for tname, orig_template in hdict[lep].items(): proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = sorted(filter(None, tname.split(f"{proc}_")))[0] #if not ((sys == "ueDOWN") and (proc == "ttJets")): continue if sys == "nosys": continue print(lep, jmult, sys, proc) nominal_hist = hdict[lep][f"{proc}_nosys"].copy() x_lims = (0, nominal_hist.dense_axes()[0].centers().size) # perform smoothing smoothed_histos_list = [(Plotter.smoothing_mttbins( nosys=nominal_hist, systematic=orig_template, mtt_centers=mtt_centers, nbinsx=len(linearize_binning[0]) - 1, nbinsy=len(linearize_binning[1]) - 1, **{"frac": frac_val / 10.}), frac_val / 10.) for frac_val in np.arange(2, 7, 2)] #smoothed_histos_chi2 = {frac_val : find_chi2(h_fitted=smooth_histo, h_unc=orig_template) for smooth_histo, frac_val in smoothed_histos_list} # perform flattening flattened_histo = Plotter.flatten(nosys=nominal_hist, systematic=orig_template) #flat_chi2 = find_chi2(h_fitted=flattened_histo, h_unc=orig_template) # plot relative deviation fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) # plot original dist orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_template.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.fill_between(orig_masked_bins, orig_masked_vals, facecolor="k", step="post", alpha=0.5, label="Unsmoothed") # plot smoothed versions for smooth_histo, frac_val in smoothed_histos_list: smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays( num_vals=smooth_histo.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.step(smooth_masked_bins, smooth_masked_vals, where="post", **{ "linestyle": "-", "label": f"Frac={frac_val}", "linewidth": 2 }) # plot flattened val flat_masked_vals, flat_masked_bins = Plotter.get_ratio_arrays( num_vals=flattened_histo.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.step(flat_masked_bins, flat_masked_vals, where="post", **{ "linestyle": "-", "label": "Flat", "linewidth": 2 }) ax.legend(loc="upper right", title=f"{sys}, {proc}") ax.axhline( 0, **{ "linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1 }) ax.autoscale() ax.set_xlim(x_lims) ax.set_xlabel( "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|") ax.set_ylabel("Rel. Deviaton from Nominal") # add lepton/jet multiplicity label ax.text(0.02, 0.94, f"{leptypes[lep]}, {jet_mults[jmult]}", fontsize=rcParams["font.size"] * 0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes) ## draw vertical lines for distinguishing different ctstar bins vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)] for vline in vlines: ax.axvline(vline, color="k", linestyle="--") hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(data_lumi_year[f"{lep}s"] / 1000., 1)) #set_trace() pltdir = os.path.join(outdir, lep, jmult, sys) if not os.path.isdir(pltdir): os.makedirs(pltdir) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "BinWidths_Comp"])) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "SmoothValues_Comp"])) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "MttBinWidths_SmoothValues_Comp"])) figname = os.path.join( pltdir, "_".join([jmult, lep, sys, proc, "SmoothedFlatVals_Comp"])) fig.savefig(figname) print(f"{figname} written") plt.close()
## normalized plots fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) norm_values = hcat.values()[()] / np.sum( hcat.values()[ ()]) ## normalized array of hist values opts = {'cmap_label': '$P_{M}$'} Plotter.plot_2d_norm( hcat, xaxis_name=hcat.axes()[0].name, yaxis_name=hcat.axes()[1].name, values=np.ma.masked_where( norm_values <= 0.0, norm_values ), # mask nonzero probabilities for plotting xlimits=x_lims, ylimits=y_lims, xlabel=xtitle, ylabel=ytitle, ax=ax, **opts) # add lep category ax.text(0.02, 0.91, "%s\n%s" % (lep_cats[lepcat], jet_mults['3Jets']), fontsize=rcParams['font.size'], horizontalalignment='left', verticalalignment='bottom',
for sig in samples: boson, mass, width, shape = sig.split( '_') opts = Plotter.styles.styles[width] pos_histo = histo[ '%s_pos' % sig, jmult, btagregion, lepcat].integrate( 'jmult' ).integrate('lepcat').integrate( 'btag').integrate('dataset') Plotter.plot_1D( pos_histo.values()[()], pos_histo.dense_axes()[0].edges(), ax=pos_ax, xlimits=x_lims, xlabel=xtitle, color=opts['color'], label=opts['name'], histtype='step') neg_histo = histo[ '%s_neg' % sig, jmult, btagregion, lepcat].integrate( 'jmult' ).integrate('lepcat').integrate( 'btag').integrate('dataset') Plotter.plot_1D( neg_histo.values()[()], neg_histo.dense_axes()[0].edges(), ax=neg_ax, xlimits=x_lims,
1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) hslice = histo[:, btag_applied, jmult, lepcat, btagregion].integrate( 'jmult' ).integrate('lepcat').integrate( 'btag').integrate('btagging') if hname == 'Jets_njets': print(jmult) yields_txt, yields_json = Plotter.get_samples_yield_and_frac( hslice, data_lumi_year['%ss' % args.lepton] / 1000.) plt_tools.print_table( yields_txt, filename='%s/%s_%s_yields_and_fracs.txt' % (pltdir, jmult, args.lepton), print_output=True) with open( '%s/%s_%s_yields_and_fracs.json' % (pltdir, jmult, args.lepton), 'w') as out: out.write(prettyjson.dumps(yields_json)) if rebinning != 1: xaxis_name = hslice.dense_axes()[0].name hslice = hslice.rebin(xaxis_name, rebinning)
print(', '.join([jmult, lepcat, btagregion, hname, sample])) fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) hslice = histo[sample, jmult, btagregion, lepcat].integrate('jmult').integrate( 'lepcat').integrate('btag').integrate( 'dataset') if hslice.values(): Plotter.plot_1D( *hslice.values().values(), histo.axis(xaxis_name).edges(), xlimits=(-5., 5.) if sample.startswith('QCD') else x_lims, xlabel=xtitle, ylabel='Events (Unweighted)', ax=ax, label='%s\n%s' % (sample, format(lumi_correction[sample], '.3f'))) ax.legend(loc='upper right') # add lepton/jet multiplicity label ax.text(0.02, 0.88, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]), fontsize=rcParams['font.size'] * 0.75, horizontalalignment='left', verticalalignment='bottom',
sig_hslice = hslice[Plotter.signal_samples] SM_hslice = hslice[Plotter.nonsignal_samples] # plot signal if sig_hslice.values(): for signal in sig_hslice.values().keys(): fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) sig_hist = sig_hslice[signal].integrate( 'process') Plotter.plot_1D( *sig_hist.values().values(), sig_hist.axis(xaxis_name).edges(), xlabel=new_xtitle, xlimits=x_lims, ax=ax, label='%s, %s' % (plt_tools.get_label( signal[0], styles.styles), signal[0].split('_')[-1])) ax.legend(loc='upper right') # add lepton/jet multiplicity label ax.text(0.02, 0.85, "%s, %s\n%s" % (lep_cats[lepcat], jet_mults[jmult], btag_cats[btagregion]), horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) hep.cms.label(
(pltdir, jmult, lep), print_output=True) with open( '%s/%s_%s_yields_and_fracs.json' % (pltdir, jmult, lep), 'w') as out: out.write(prettyjson.dumps(yields_json)) if rebinning != 1: xaxis_name = hslice.dense_axes()[0].name hslice = hslice.rebin(xaxis_name, rebinning) ## plot mc+data if withData: ax, rax = Plotter.plot_stack1d(ax, rax, hslice, xlabel=xtitle, xlimits=x_lims) else: ax = Plotter.plot_mc1d(ax, hslice, xlabel=xtitle, xlimits=x_lims) ## set axes labels and titles # add lepton/jet multiplicity label ax.text(0.02, 0.92, "%s, %s" % (objtypes['Lep'][lep], jet_mults[jmult]), fontsize=hep.styles_cms.CMS['font.size'] * 0.75, horizontalalignment='left',
for ttbar_type in sorted(set([key[1] for key in histo.values().keys()])): decay_label = "%s %s" % (plt_tools.get_label(dataset, styles), ttdecay_types[ttbar_type]) #decay_label = ttdecay_types[ttbar_type] pltdir = os.path.join(outdir, dataset, ttbar_type) if isSignal(dataset) else os.path.join(outdir, "ttJets", ttbar_type) if not os.path.isdir(pltdir): os.makedirs(pltdir) for genobj, (objlabel, mass_range) in objects[ttbar_type].items(): new_xtitle = xtitle.replace("obj", objlabel) if hname == "mass": x_lims = mass_range tt_histo = histo[genobj, ttbar_type].integrate("objtype").integrate("ttdecay") fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) Plotter.plot_1D(tt_histo.values()[()], tt_histo.axis(xaxis_name).edges(), xlabel=new_xtitle, xlimits=x_lims, ax=ax, histtype="step") hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(lumi_to_use, 1)) #set_trace() # add lepton/jet multiplicity label if isSignal(dataset): if "Int" in dataset: sig_type = "Int, w $<$ 0" if "Int_neg" in dataset else "Int, w $>$ 0" else: sig_type = "Res" sig_label = "%s\n%s" % (decay_label, sig_type) ax.text( 0.95, 0.85, sig_label, horizontalalignment="right", verticalalignment="bottom", transform=ax.transAxes ) else:
]) mc_hdict = plt_tools.add_coffea_files( mc_fnames) if len(mc_fnames) > 1 else load(mc_fnames[0]) # get hists mc_nTrueInt_histo = mc_hdict['PU_nTrueInt'] data_input_dir = os.path.join(proj_dir, 'inputs', 'data', base_jobid, 'Pileup') # central data_pu_central = convert_histo_root_file( os.path.join(data_input_dir, '%s_data.meta.pu.root' % args.year)) data_pu_dict = Plotter.root_converters_dict_to_hist(data_pu_central, vars=['pileup'], sparse_axes_list=[{ 'name': 'dataset', 'label': "Event Process", 'fill': 'data' }]) # up data_pu_up = convert_histo_root_file( os.path.join(data_input_dir, '%s_data.meta.pu_up.root' % args.year)) data_pu_up_dict = Plotter.root_converters_dict_to_hist(data_pu_up, vars=['pileup'], sparse_axes_list=[{ 'name': 'dataset', 'label': "Event Process", 'fill':