def region_comparison_plot(tag): for year in [2017,2018]: regions = ['1m', '2m', '1e','1m_hlt','2m_hlt'] opts = markers('data') opts['markersize'] = 5. opts['fillstyle'] = 'none' emarker = opts.pop('emarker', '') fig, ax, rax = fig_ratio() x, y, yerr = {}, {}, {} for region in regions: if region.endswith('e'): file = f'output/{tag}/table_{region}_EGamma_{year}.txt' else: file = f'output/{tag}/table_{region}_SingleMuon_{year}.txt' x[region], y[region], yerr[region] = get_xy(file) opts['color'] = colors[region] ax.errorbar(x[region], y[region], yerr=yerr[region],label=f'{region} region', **opts) # opts.pop('elinewidth') if region=='1m': continue rax.errorbar(x['1m'], y[region]/y['1m'], yerr[region]/y['1m'], **opts) # for f in files: plot(f) outdir = f"./output/{tag}" # ax.set_ylim(0.9,1) ax.legend() ax.set_ylabel("Efficiency") ax.xaxis.set_major_locator(MultipleLocator(200)) ax.xaxis.set_minor_locator(MultipleLocator(50)) ax.yaxis.set_major_locator(MultipleLocator(0.05)) ax.yaxis.set_minor_locator(MultipleLocator(0.01)) ax.set_ylim(0.9,1.02) ax.grid(1) rax.set_ylim(0.9,1.1) rax.grid(1) rax.set_xlabel("Recoil or $p_{T}^{miss}$ (GeV)") rax.set_ylabel(r"Ratio to single-$\mu$") plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.text(0., 1., f'{year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.savefig(pjoin(outdir, f'region_comparison_data_{year}.pdf')) fig.clear() plt.close(fig)
def pdf_plot(acc): outdir = './output/pdfstudy/' if not os.path.exists(outdir): os.makedirs(outdir) datasets = [ 'WJetsToLNu_HT_MLM_2017', 'DYJetsToLL_M-50_HT_MLM_2017', ] for ds in datasets: fig, ax, rax = fig_ratio() h = acc['gen_vpt'] h = h.rebin(h.axis('vpt'), hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000)) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h.project(h.axis('dataset'), ds) for pdf in h.axis('pdf').identifiers(): if str(pdf) == 'none': continue data_err_opts['color'] = colors[str(pdf)] hist.plot1d( h.project('pdf', pdf), # overlay='pdf', error_opts=data_err_opts, ax=ax, overflow='all', clear=False) hist.plotratio( h.project('pdf', pdf), h.project('pdf', 'none'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, clear=False, ) ax.set_ylim(1e-3, 1e8) rax.set_ylim(0.9, 1.6) ax.set_yscale('log') leg = ax.legend() for i, pdf in enumerate(h.axis('pdf').identifiers()): if str(pdf) == 'none': continue leg.get_texts()[i].set_text(str(pdf)) fig.savefig(pjoin(outdir, f'{ds}.pdf')) plt.close(fig)
def main(): """ A script to easily compare template files between different runs. Usage: ./compare_templates.py /path/to/first/template_file.root /path/to/second/template_file.root The script will loop over all templates in the files and create a comparison plot for each of them. All plots are dumped into a folder for inspection. """ args = parse_commandline() # Based on input locations, derive tag names to identify the files tag1 = os.path.basename(os.path.dirname(args.fname1)) tag2 = os.path.basename(os.path.dirname(args.fname2)) # filter regex = re.compile(args.filter) # Convert to dictionary h1 = make_dict(args.fname1) h2 = make_dict(args.fname2) # Make sure the two files have consistent keys # assert(h1.keys()==h2.keys()) # Create plot folder if not os.path.exists(args.outdir): os.makedirs(args.outdir) # Do the actual plotting table = [] for key in tqdm.tqdm(h1.keys()): if key not in h2: print("Found missing key ", key) if not regex.match(str(key)): continue fig, ax, rax = fig_ratio() x = 0.5 * np.sum(h1[key].bins, axis=1) edges = np.unique(h1[key].bins) try: v1 = np.sum(h1[key].allvalues) v2 = np.sum(h2[key].allvalues) if v2 == v1: ratio = 1 elif v1 == 0: ratio = "-" else: ratio = v2 / v1 table.append([key, v1, v2, ratio]) except KeyError: plt.close(fig) continue hep.histplot( sum_overflow_into_last_bin(h1[key].allvalues), edges, ax=ax, label=f"{tag1}, Integral={np.sum(h1[key].values):.1f}", color='navy', ) hep.histplot(sum_overflow_into_last_bin(h2[key].allvalues), edges, yerr=np.sqrt(h2[key].variances), ax=ax, label=f"{tag2}, Integral={np.sum(h2[key].values):.1f}", color='crimson', marker='o', markersize=5, histtype='errorbar') ax.legend() # Bottom panel: ratio plot valid = h1[key].values != 0 rax.errorbar( x[valid], h2[key].values[valid] / h1[key].values[valid], np.sqrt(h2[key].variances[valid]) / h1[key].values[valid], linestyle='none', marker='o', color="crimson", ) # Add indicators for bins where we could not calculate the ratio if np.any(~valid): rax.plot(0.5 * np.sum(h1[key].bins, axis=1)[~valid], np.ones(np.sum(~valid)), 'x', color="k", fillstyle="none") # Aesthetics ax.set_title(key) rax.set_ylim(*map(float, args.rlim.split(","))) rax.set_xlabel("Recoil (GeV)") rax.set_ylabel("Ratio") ax.set_ylabel("Events / bin") ax.set_yscale("log") try: ax.set_ylim( 0.5 * min(h2[key].values[h2[key].values > 0]), 1.5 * max(h2[key].values), ) except ValueError: plt.close(fig) continue rax.grid(linestyle='--') fig.savefig(pjoin(args.outdir, f"{key}.png")) plt.close(fig) print(tabulate(table))
def sf_comparison_plot(tag): for year in [2017,2018]: regions = ['1m', '2m', '1m_hlt', '2m_hlt'] opts = markers('data') opts['markersize'] = 5 opts['fillstyle'] = 'none' emarker = opts.pop('emarker', '') fig, ax, rax = fig_ratio() x, y, yerr = {}, {}, {} for region in regions: if '1e' in region: fnum = f'output/{tag}/table_{region}_EGamma_{year}.txt' fden = f'output/{tag}/table_{region}_WJetsToLNu-MLM_{year}.txt' elif '1m' in region: fnum = f'output/{tag}/table_{region}_SingleMuon_{year}.txt' fden = f'output/{tag}/table_{region}_WJetsToLNu-MLM_{year}.txt' elif '2m' in region: fnum = f'output/{tag}/table_{region}_SingleMuon_{year}.txt' fden = f'output/{tag}/table_{region}_DYNJetsToLL_M-50-MLM_{year}.txt' xnum, ynum, yerrnum = get_xy(fnum) xden, yden, yerrden = get_xy(fden) x[region] = xnum y[region] = ynum / yden yerr[region] = ratio_unc(ynum, yden, yerrnum, yerrden) opts['color'] = colors[region] opts['marker'] = region_marker[region] ax.errorbar(x[region], y[region], yerr=yerr[region],label=f'{region} region', **opts) # opts.pop('elinewidth') if region=='1m': continue rax.errorbar(x['1m'], y[region]/y['1m'], ratio_unc(y[region],y['1m'],yerr[region],yerr['1m']), **opts) # for f in files: plot(f) outdir = f"./output/{tag}" # ax.set_ylim(0.9,1) ax.legend() ax.set_ylabel("Data / MC SF") ax.xaxis.set_major_locator(MultipleLocator(200)) ax.xaxis.set_minor_locator(MultipleLocator(50)) ax.yaxis.set_major_locator(MultipleLocator(0.05)) ax.yaxis.set_minor_locator(MultipleLocator(0.01)) ax.set_ylim(0.9,1.1) ax.grid(1) rax.set_ylim(0.95,1.05) rax.yaxis.set_major_locator(MultipleLocator(0.05)) rax.yaxis.set_minor_locator(MultipleLocator(0.01)) rax.grid(1) rax.set_xlabel("Recoil or $p_{T}^{miss}$ (GeV)") rax.set_ylabel(r"Ratio to single-$\mu$") plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.text(0., 1., f'{year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.savefig(pjoin(outdir, f'sf_comparison_{year}.pdf')) fig.clear() plt.close(fig)
def fit(tag, year): outdir = './output/gamma/fit/' try: os.makedirs(outdir) except FileExistsError: pass x, eff, eff_up, eff_down = {}, {}, {}, {} x['data'], eff['data'], eff_up['data'], eff_down['data'] = load(tag, 'JetHT', year) x['mc'], eff['mc'], eff_up['mc'], eff_down['mc'] = load(tag, 'GJets_HT_MLM', year) pars = {} cross = {} for key in ['data','mc']: pars[key], _ = curve_fit(sigmoid, x[key], eff[key], sigma=0.5*(eff_up[key]-eff_down[key]), p0=[1e-3,200,0.1,1]) cross[key] = minimize( lambda x: np.abs(sigmoid(x, *pars[key]) - 0.95),x0=230) fig, ax, rax = fig_ratio() xinterp = np.linspace(min(x['data']), max(x['data']), 1000) handles = [] ax.errorbar(x['data'], eff['data'], 0.5*(eff_up['data']-eff_down['data']),fmt='o',label='Data',color=colors["data"]) ax.errorbar(x['mc'], eff['mc'], 0.5*(eff_up['mc']-eff_down['mc']),fmt='s',label='MC',fillstyle='none',color=colors["mc"]) ax.plot(xinterp, sigmoid(xinterp, *pars['data']), label='Data fit',color=colors["data"],zorder=-1) ax.plot(xinterp, sigmoid(xinterp, *pars['mc']), label='MC fit',color=colors["mc"],zorder=-1,linestyle='--') ax.set_ylim(0.,1.1) ax.set_xlim(100,1100) ax.legend() ax.text(350,.4,'f(x) = c + (d-c) / (1 + exp(-a * (x-b)))') ax.text( 300, 0.1, '\n'.join([ f"a = {pars['data'][0]:.3f} / GeV", f"b = {pars['data'][1]:.2f} GeV", f"c = {pars['data'][2]:.3f}", f"d = {pars['data'][3]:.3f}" ]), color=colors['data'] ) ax.text( 600, 0.1, '\n'.join([ f"a = {pars['mc'][0]:.3f} / GeV", f"b = {pars['mc'][1]:.2f} GeV", f"c = {pars['mc'][2]:.3f}", f"d = {pars['mc'][3]:.3f}" ]), color=colors['mc'] ) ax.text(700,0.8, "\n".join([ f"Data > 95% @ {cross['data'].x[0]:.0f} GeV", f"MC > 95% @ {cross['mc'].x[0]:.0f} GeV", ]) ) ax.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % lumi(year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) rax.set_ylim(0.95,1.1) rax.grid() # rax.plot([cross['data'].x[0],cross['data'].x[0]], [0.8,1.05],color='k',) # ax.plot([cross['data'].x[0],cross['data'].x[0]], [0.,1.05],color='k',linestyle='--') # ax.plot([cross['mc'].x[0],cross['mc'].x[0]], [0.9,1.05],color='r') # ax.plot([215,215],[0.9,1.05]) rax.errorbar(x["data"], eff["data"] / eff["mc"], 0.5*(eff_up["data"] - eff_down["data"]) / eff["mc"],fmt='o',label='Data / MC',color=colors['data']) rxinterp = np.linspace(cross['data'].x[0], max(xinterp),1000) rax.plot(rxinterp, sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']),label=f"Data / MC fit ratio, plateau at {100*(sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']))[-1]:.1f} %",color='k') rax.plot(rxinterp, 0.99*sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']), label='1% uncertainty on fit',linestyle='--',color='gray') rax.plot(rxinterp, 1.01*sigmoid(rxinterp, *pars['data']) / sigmoid(rxinterp, *pars['mc']),linestyle='--',color='gray') # rxinterp2 = np.linspace(min(xinterp),cross['data'].x[0], 1000) # rax.plot(rxinterp2, sigmoid(rxinterp2, *pars['data']) / sigmoid(rxinterp2, *pars['mc']),color='k',linestyle=':') rax.legend() rax.set_ylabel("Ratio") ax.set_ylabel("Trigger efficiency") ax.set_xlabel("Photon $p_{T}$ (GeV)") rax.set_xlabel("Photon $p_{T}$ (GeV)") ax.figure.savefig(pjoin(outdir, f'fit_{tag}_{year}.pdf')) ax.figure.clf()
def data_mc_comparison_plot(tag, ymin=0, ymax=1.1, distribution='recoil', jeteta_config=None, output_format='pdf'): if 'gamma' in tag: regions = [ 'g_HLT_PFHT1050', 'g_HLT_PFHT590', 'g_HLT_PFHT680', 'g_HLT_PFHT780', 'g_HLT_PFHT890' ] elif 'recoil' in tag: regions = ['1m', '2m'] else: regions = ['1m', '2m', '1e', '2m_hlt'] opts = markers('data') # opts['markersize'] = 5 # opts['fillstyle'] = 'none' emarker = opts.pop('emarker', '') outdir = f"./output/{tag}" outpath = pjoin(outdir, f'trig_sf.root') try: outfile = uproot.recreate(outpath) except OSError: outfile = uproot.update(outpath) for year in [2017, 2018]: for region in regions: fig, ax, rax = fig_ratio() if '1e' in region: fnum = f'output/{tag}/table_{region}_met_EGamma_{year}.txt' fden = f'output/{tag}/table_{region}_met_WJetsToLNu_HT_MLM_{year}.txt' xlabel = "$p_{T}^{miss}$ (GeV)" elif '1m' in region: fnum = f'output/{tag}/table_{region}_recoil_SingleMuon_{year}{"_"+jeteta_config if jeteta_config else ""}.txt' fden = f'output/{tag}/table_{region}_recoil_WJetsToLNu_HT_MLM_{year}{"_"+jeteta_config if jeteta_config else ""}.txt' xlabel = "Recoil (GeV)" elif '2m' in region: fnum = f'output/{tag}/table_{region}_recoil_SingleMuon_{year}{"_"+jeteta_config if jeteta_config else ""}.txt' fden = f'output/{tag}/table_{region}_recoil_VDYJetsToLL_M-50_HT_MLM_{year}{"_"+jeteta_config if jeteta_config else ""}.txt' xlabel = "Recoil (GeV)" elif 'g_' in region: fnum = f'output/{tag}/table_{region}_photon_pt0_JetHT_{year}.txt' fden = f'output/{tag}/table_{region}_photon_pt0_GJets_HT_MLM_{year}.txt' xlabel = "Photon $p_{T}$ (GeV)" if not os.path.exists(fnum): print(f"File not found {fnum}") continue if not os.path.exists(fden): print(f"File not found {fden}") continue xnum, xedgnum, ynum, yerrnum = get_xy(fnum) xden, xedgden, yden, yerrden = get_xy(fden) xsf = xnum ysf = ynum / yden ysferr = ratio_unc(ynum, yden, yerrnum, yerrden) opts['color'] = 'k' ax.errorbar(xnum, ynum, yerr=yerrnum, label=f'Data, {region} region', **opts) opts['color'] = 'r' ax.errorbar(xden, yden, yerr=yerrden, label=f'MC, {region} region', **opts) # rax.plot([0,1000],[0.98,0.98],color='blue') # rax.plot([0,1000],[0.99,0.99],color='blue',linestyle='--') if 'g_' in region: ax.plot([215, 215], [0.9, 1.1], color='blue') rax.plot([215, 215], [0.95, 1.05], color='blue') elif distribution == 'recoil': ax.plot([250, 250], [0.0, 1.1], color='blue') rax.plot([250, 250], [0.95, 1.05], color='blue') else: ax.plot([250, 250], [0.9, 1.1], color='blue') rax.plot([250, 250], [0.95, 1.05], color='blue') opts['color'] = 'k' rax.errorbar(xsf, ysf, ysferr, **opts) ax.legend() ax.set_ylabel("Efficiency") ax.xaxis.set_major_locator(MultipleLocator(200)) ax.xaxis.set_minor_locator(MultipleLocator(50)) ax.set_ylim(ymin, ymax) ax.grid(1) if distribution == 'mjj': ax.yaxis.set_major_locator(MultipleLocator(0.05)) ax.yaxis.set_minor_locator(MultipleLocator(0.01)) elif distribution == 'recoil': ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(MultipleLocator(0.05)) rax.set_xlabel(xlabel) rax.set_ylabel("Data / MC SF") rax.set_ylim(0.95, 1.05) rax.yaxis.set_major_locator(MultipleLocator(0.05)) rax.yaxis.set_minor_locator(MultipleLocator(0.01)) rax.grid(1) plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi_by_region(region, year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(1., 0.95, f'{jeteta_config if jeteta_config else ""}', fontsize=12, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(0., 1., f'{year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) fig.savefig( pjoin( outdir, f'data_mc_comparison_{region}_{year}{"_"+jeteta_config if jeteta_config else ""}.{output_format}' )) fig.clear() plt.close(fig) vals = np.array(sorted(list(set(list(xedgnum.flatten()))))) ysf[np.isnan(ysf) | np.isinf(np.abs(ysf))] = 1 outfile[f'{tag}_{region}_{year}'] = (ysf, vals)
def extract_yields_in_cr(acc, distribution, region='cr_vbf_qcd_rs', year=2017): '''Calculate the data - (nonQCD MC) in the QCD CR.''' acc.load(distribution) h = acc[distribution] h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) if distribution in BINNINGS.keys(): new_ax = BINNINGS[distribution] h = h.rebin(new_ax.name, new_ax) h = h.integrate('region', region) data = f'MET_{year}' mc = re.compile( f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL_M-50_HT_MLM.*|WJetsToLNu.*HT.*).*{year}' ) fig, ax, rax = fig_ratio() data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } hist.plot1d(h[data], ax=ax, overlay='dataset', binwnorm=1, error_opts=data_err_opts) hist.plot1d(h[mc], ax=ax, overlay='dataset', binwnorm=1, stack=True, clear=False) ax.set_yscale('log') ax.set_ylim(1e-4, 1e6) ax.set_ylabel('Events / GeV') ax.yaxis.set_ticks_position('both') handles, labels = ax.get_legend_handles_labels() for handle, label in zip(handles, labels): for regex, new_label in PRETTY_LEGEND_LABELS.items(): if re.match(regex, label): handle.set_label(new_label) ax.legend(title='VBF QCD CR', handles=handles, ncol=2) # Calculate data - MC h_data = h[data].integrate('dataset') h_mc = h[mc].integrate('dataset') h_mc.scale(-1) h_data.add(h_mc) # Plot data - MC on the bottom pad hist.plot1d(h_data, ax=rax, binwnorm=1) rax.set_ylabel('(Data - MC) / GeV') rax.set_ylim(1e-3, 1e1) rax.set_yscale('log') rax.get_legend().remove() rax.yaxis.set_ticks_position('both') outdir = './output/qcd_cr' try: os.makedirs(outdir) except FileExistsError: pass outpath = pjoin(outdir, f'qcd_cr_{distribution}.pdf') fig.savefig(outpath) plt.close(fig) print(f'File saved: {outpath}') # Return the QCD yield return h_data