def plot_ht_dist(acc, regex, tag): '''Given the accumulator and the dataset regex, plot the HT distribution.''' acc.load('lhe_ht') h = acc['lhe_ht'] h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) # Choose the relevant dataset(s) h = h[re.compile(regex)] new_ht_bins = hist.Bin('ht', r'$H_T \ (GeV)$', 50, 0, 4000) h = h.rebin('ht', new_ht_bins) # Plot the HT distribution fig, ax = plt.subplots(1, 1) hist.plot1d(h, ax=ax, overflow='all', binwnorm=True, overlay='dataset') ax.set_yscale('log') ax.set_ylim(1e-3, 1e6) if 'gjets' in tag: ax.plot([600, 600], [1e-3, 1e6]) if not os.path.exists('./output'): os.mkdir('output') fig.savefig(f'./output/{tag}_lhe_ht.pdf')
def legacy_limit_input_monojet(acc, outdir='./output', unblind=False): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param outdir: Output directory :type outdir: string """ distribution = 'recoil' regions = [ 'cr_2m_j', 'cr_1m_j', 'cr_2e_j', 'cr_1e_j', 'cr_g_j', 'sr_j_no_veto_all' ] if unblind: regions.append('sr_j') if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017, 2018]: signal = re.compile(f'(GluGlu|WH|ZH|ggZH|VBF).*(I|i)inv.*{year}') f = uproot.recreate(pjoin(outdir, f'legacy_limit_monojet_{year}.root')) data, mc = datasets(year, unblind=unblind) for region in regions: print(f'Region {region}') # Rebin h = copy.deepcopy(acc[distribution]) newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016()) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), region) for dataset in map(str, h.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset) or signal.match(dataset)): continue print(f" Dataset: {dataset}") th1 = export1d(h.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}' except: print(f"Skipping {dataset}") continue f[histo_name] = th1 if not unblind: f[f'{legacy_region_name("sr_j")}_data'] = f[ f'{legacy_region_name("sr_j")}_zjets'] merge_legacy_inputs(outdir)
def plot_lhe_v_pt(acc, tag, regex, outputrootfile, pttype): outdir = './output/' if not os.path.exists(outdir): os.makedirs(outdir) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))) new_ax = hist.Bin('vpt', 'LHE V $p_{T}$ (GeV)', list(range(80, 800, 40)) + list(range(800, 2000, 100))) for dist in ['gen_vpt']: h = copy.deepcopy(acc[dist]) h = h.integrate('type', pttype) h = h.rebin(h.axis('vpt'), new_ax) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] h = h.integrate('weight_type', 'nominal') h = h.integrate('weight_index', slice(-0.5, 0.5)) hist.plot1d(h, overlay='dataset', overflow='all', binwnorm=True, ax=ax) lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*LHE.*')].integrate('dataset') hist.plotratio(nlo, lo, ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, label='2017 NLO/LO ratio') old = get_old_kfac(tag) old_x = 0.5 * (old.bins[:, 0] + old.bins[:, 1]) rax.plot(old_x, old.values, 'ob-', label='2016 QCD k fac') rax.plot(old_x, old.values * pdfwgt_sf(old_x), 'or-', label='2016 x ad-hoc DY pdfwgt SF') ax.set_yscale('log') ax.set_ylim(1e-3, 1e6) rax.set_ylim(0, 2) rax.legend() fig.savefig(pjoin(outdir, f'{tag}_{dist}.pdf')) sf_x = lo.axis('vpt').edges() sf_y = nlo.values()[()] / lo.values()[()] # try: # f = uproot.create(f'gen_v_pt_qcd_sf.root') # except OSError: outputrootfile[tag] = (sf_y, sf_x)
def legacy_limit_input(acc, outdir='./output'): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param outdir: Output directory :type outdir: string """ distribution = 'recoil' if not os.path.exists(outdir): os.makedirs(outdir) for wp in ['tau21', 'loosemd', 'tightmd', 'loose', 'tight']: year = 2017 signal = re.compile(f'.*(Hinv|HToInvisible).*{year}') f = uproot.recreate( pjoin(outdir, f'legacy_limit_monov_{wp}_{year}.root')) data, mc = datasets(year) for region in [ 'cr_2m_v', 'cr_1m_v', 'cr_2e_v', 'cr_1e_v', 'cr_g_v', 'sr_v' ]: if wp == 'tau21': monov_region_name = region else: monov_region_name = region.replace('_v', f'_{wp}_v') print(f'Region {region}') # Rebin h = copy.deepcopy(acc[distribution]) newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016()) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), monov_region_name) for dataset in map(str, h.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset) or signal.match(dataset)): print(f"Skip dataset: {dataset}") continue print(f" Dataset: {dataset}") th1 = export1d(h.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}' except: print(f"Skipping {dataset}") continue f[histo_name] = th1 f[f'{legacy_region_name("sr_v")}_data'] = f[ f'{legacy_region_name("sr_v")}_zjets'] merge_legacy_inputs(outdir)
def legacy_limit_input_monojet(acc, args): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param args.outdir: Output directory :type args.outdir: string """ distribution = 'recoil' regions = [ 'cr_2m_j', 'cr_1m_j', 'cr_2e_j', 'cr_1e_j', 'cr_g_j', 'sr_j_no_veto_all' ] if args.unblind: regions.append('sr_j') if not os.path.exists(args.outdir): os.makedirs(args.outdir) # Histogram prep, rebin, etc h = copy.deepcopy(acc[distribution]) newax = hist.Bin('recoil','Recoil (GeV)', recoil_bins_2016()) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) for year in [2017,2018]: f = uproot.recreate(pjoin(args.outdir, f'legacy_limit_monojet_{year}.root')) data, mc = datasets(year, unblind=args.unblind, nlo=args.nlo) for region in regions: print(f'Region {region}') ih = h.integrate(h.axis('region'),region) for dataset in map(str, h.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset)): continue print(f" Dataset: {dataset}") th1 = export1d(ih.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}' except RuntimeError: print(f"Skipping {dataset}") continue f[histo_name] = th1 if not args.unblind: f[f'{legacy_region_name("sr_j")}_data'] = f[f'{legacy_region_name("sr_j")}_zjets'] merge_legacy_inputs(args.outdir)
def pdf_plot(acc): outdir = './output/pdfstudy/' if not os.path.exists(outdir): os.makedirs(outdir) datasets = [ 'WJetsToLNu_HT_MLM_2017', 'DYJetsToLL_M-50_HT_MLM_2017', ] for ds in datasets: fig, ax, rax = fig_ratio() h = acc['gen_vpt'] h = h.rebin(h.axis('vpt'), hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000)) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h.project(h.axis('dataset'), ds) for pdf in h.axis('pdf').identifiers(): if str(pdf) == 'none': continue data_err_opts['color'] = colors[str(pdf)] hist.plot1d( h.project('pdf', pdf), # overlay='pdf', error_opts=data_err_opts, ax=ax, overflow='all', clear=False) hist.plotratio( h.project('pdf', pdf), h.project('pdf', 'none'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, clear=False, ) ax.set_ylim(1e-3, 1e8) rax.set_ylim(0.9, 1.6) ax.set_yscale('log') leg = ax.legend() for i, pdf in enumerate(h.axis('pdf').identifiers()): if str(pdf) == 'none': continue leg.get_texts()[i].set_text(str(pdf)) fig.savefig(pjoin(outdir, f'{ds}.pdf')) plt.close(fig)
def legacy_limit_input_vbf(acc, outdir='./output'): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param outdir: Output directory :type outdir: string """ distribution = 'mjj' if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017, 2018]: signal = re.compile(f'VBF_HToInvisible.*{year}') f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{year}.root')) data, mc = datasets(year) for region in [ 'cr_2m_vbf', 'cr_1m_vbf', 'cr_2e_vbf', 'cr_1e_vbf', 'cr_g_vbf', 'sr_vbf' ]: print(f'Region {region}') # Rebin h = copy.deepcopy(acc[distribution]) newax = hist.Bin('mjj', '$M_{jj}$ (GeV)', mjj_bins_2016()) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), region) for dataset in map(str, h.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset) or signal.match(dataset)): print(f"Skip dataset: {dataset}") continue print(f" Dataset: {dataset}") th1 = export1d(h.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name_vbf(dataset)}' print(histo_name) except: print(f"Skipping {dataset}") continue f[histo_name] = th1 #f[f'{legacy_region_name("sr_vbf")}_data'] = f[f'{legacy_region_name("sr_vbf")}_zjets'] merge_legacy_inputs(outdir)
def pdf_plot(acc): outdir = './output/photon_pt_cut/' if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017,2018]: fig = plt.gcf() fig.clf() ax = plt.gca() h = copy.deepcopy(acc['photon_pt0_recoil']) h=h.rebin(h.axis('pt'), hist.Bin("pt",r"$p_{T}^{\gamma}$ (GeV)", [0,175,215,10000])) h=h.rebin(h.axis('recoil'),hist.Bin('recoil','recoil',list(range(200,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) # hlow = h.integrate(h.axis('pt'),) pprint(h.axis('dataset').identifiers()) # h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}') h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}') h = h.integrate(h.axis('region'),'tr_g_notrig_num') pprint(h) hist.plot1d( h, overlay='pt', # error_opts=data_err_opts, ax=ax, overflow='all', clear=False) ax.set_ylim(0,2e5) ax.set_xlim(200,500) ax.set_ylabel('Expected GJets events (a.u.)') # rax.set_ylim(0.9,1.6) # ax.set_yscale('log') leg=ax.legend(['< 175', '175 - 215', '> 215'],title='Photon $p_{T}$') # for i, pdf in enumerate(h.axis('pdf').identifiers()): # if str(pdf)=='none': # continue # leg.get_texts()[i].set_text(str(pdf)) ax.text(0.97, 0.65, 'Photon CR, no trigger applied', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) ax.plot([250,250],[0,1e8],'--',color='grey') fig.savefig(pjoin(outdir,f'photon_pt_cut_{year}.pdf')) plt.close(fig)
def make_templates(acc, fout): '''Reads coffea histograms and converts to ROOT templates.''' # Load inputs acc.load('sieie') acc.load('nevents') acc.load('sumw') # Scaling h = acc['sieie'] h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) pt_ax = hist.Bin( 'pt', '$p_{T}$ (GeV)', list(range(200, 300, 100)) + list(range(300, 700, 100)) + [1000]) h = h.rebin('pt', pt_ax) h_iso = h.integrate('cat', 'medium_nosieie') h_noniso = h.integrate('cat', 'medium_nosieie_invertiso') # Make templates templates = {} for year in [2017, 2018]: mc = re.compile(f'(GJet).*HT.*{year}') data = re.compile(f'(EGamma).*{year}.*') templates[f'{year}_good'] = h_iso[mc].integrate('dataset') bad = h_noniso[data].integrate('dataset') subtr = h_noniso[mc].integrate('dataset') subtr.scale(-1) bad.add(subtr) templates[f'{year}_bad'] = bad templates[f'{year}_data'] = h_iso[data].integrate('dataset') print(templates) # Save output f = uproot.recreate(fout) for name, histo in templates.items(): edges = histo.axis('pt').edges() for i in range(len(edges) - 1): low = edges[i] high = edges[i + 1] th1 = export1d(histo.integrate('pt', slice(low, high))) f[f'{name}_pt{low:.0f}-{high:.0f}'] = th1
def main(): inpath = "../../input/merged" year = 2017 mc = re.compile( f'(VDY.*HT.*|QCD.*|W.*HT.*|ST_|TTJets-FXFX_|Diboson_|GJets.*HT.*|ZJetsToNuNu.*){year}' ) signal = re.compile(f'WH.*{year}') distribution = "recoil" acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) acc.load(distribution) acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') try: acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) S_over_B(acc, distribution, 'sr_tight_v', mc=mc, signal=signal, unc=0.05, outname="SB_unc005.png", cutlim=(250, 750)) S_over_B(acc, distribution, 'sr_tight_v', mc=mc, signal=signal, unc=0.10, outname="SB_unc010.png", cutlim=(250, 750)) except KeyError: print("key error ") return -2
def plot_ht_stitching(acc, tag, regex): outdir = './output/ht/' if not os.path.exists(outdir): os.makedirs(outdir) for dist in ['lhe_ht']: h=copy.deepcopy(acc[dist]) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) fig, ax, _ = hist.plot1d( h[re.compile(regex)], overlay='dataset', overflow='all', binwnorm=True) plt.yscale('log') plt.ylim(1e-3,1e6) fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf'))
def extract_yields_in_cr(acc, distribution, region='cr_vbf_qcd_rs', year=2017): '''Calculate the data - (nonQCD MC) in the QCD CR.''' acc.load(distribution) h = acc[distribution] h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) if distribution in BINNINGS.keys(): new_ax = BINNINGS[distribution] h = h.rebin(new_ax.name, new_ax) h = h.integrate('region', region) data = f'MET_{year}' mc = re.compile( f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|DYJetsToLL_M-50_HT_MLM.*|WJetsToLNu.*HT.*).*{year}' ) fig, ax, rax = fig_ratio() data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } hist.plot1d(h[data], ax=ax, overlay='dataset', binwnorm=1, error_opts=data_err_opts) hist.plot1d(h[mc], ax=ax, overlay='dataset', binwnorm=1, stack=True, clear=False) ax.set_yscale('log') ax.set_ylim(1e-4, 1e6) ax.set_ylabel('Events / GeV') ax.yaxis.set_ticks_position('both') handles, labels = ax.get_legend_handles_labels() for handle, label in zip(handles, labels): for regex, new_label in PRETTY_LEGEND_LABELS.items(): if re.match(regex, label): handle.set_label(new_label) ax.legend(title='VBF QCD CR', handles=handles, ncol=2) # Calculate data - MC h_data = h[data].integrate('dataset') h_mc = h[mc].integrate('dataset') h_mc.scale(-1) h_data.add(h_mc) # Plot data - MC on the bottom pad hist.plot1d(h_data, ax=rax, binwnorm=1) rax.set_ylabel('(Data - MC) / GeV') rax.set_ylim(1e-3, 1e1) rax.set_yscale('log') rax.get_legend().remove() rax.yaxis.set_ticks_position('both') outdir = './output/qcd_cr' try: os.makedirs(outdir) except FileExistsError: pass outpath = pjoin(outdir, f'qcd_cr_{distribution}.pdf') fig.savefig(outpath) plt.close(fig) print(f'File saved: {outpath}') # Return the QCD yield return h_data
def plot_recoil(acc, region_tag="1m", dataset='SingleMuon', year=2018, tag="test", distribution="recoil"): h = acc[distribution] h = merge_extensions(h) # h = scale_to_xs(h, acc) h = merge_datasets(h) newbin = hist.Bin(distribution,f"{distribution} (GeV)",np.array(list(range(0,400,20)) + list(range(400,1100,100)))) h = h.rebin(h.axis(distribution), newbin) ds = f'{dataset}_{year}' h = h.project(h.axis('dataset'), ds) # print(h) hnum = h.project(h.axis('region'),f'tr_{region_tag}_num') hden = h.project(h.axis('region'),f'tr_{region_tag}_den') # print(hden, hnum) # Recoil plot try: fig, ax,_ = hist.plot1d(hnum, binwnorm=True) except KeyError: print(f'ERROR: {region_tag}, {dataset}, {year}') return hist.plot1d(hden, ax=ax, clear=False, binwnorm=True) plt.yscale('log') plt.gca().set_ylim(0.1,1e6) outdir = f"./output/{tag}" if not os.path.exists(outdir): os.makedirs(outdir) fig.savefig(pjoin(outdir, f'{distribution}_{region_tag}_{dataset}_{year}.pdf')) with open(pjoin(outdir,f'table_{region_tag}_{dataset}_{year}.txt'),"w") as f: f.write(content_table(hnum, hden) + "\n") plt.close(fig) # Efficiency fig, ax,_ = hist.plotratio(hnum, hden, guide_opts={}, unc='clopper-pearson', error_opts=markers('data') ) ax.set_ylim(0,1.1) ax.set_xlim(0,xmax) ax.set_ylabel("Efficiency") plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.text(0., 1., f'{region_tag}, {year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) plt.text(1., 0., f'{trgname(year, tag)}', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.plot([0,xmax],[0.95,0.95],'r-') fig.savefig(pjoin(outdir, f'eff_{region_tag}_{dataset}_{year}.pdf')) plt.close(fig)
def main(): indir = "../lo_vs_nlo/input/2019-09-26_checks/" acc = acc_from_dir(indir) for region in [ 'cr_2m_j', 'cr_1m_j', 'cr_2m_j_noveto_tau', 'cr_1m_j_noveto_tau', 'cr_2m_j_noveto_photon', 'cr_1m_j_noveto_photon' ]: sel_2016 = re.compile(f'MET_2016') sel_2017 = re.compile(f'MET_2017') sel_2018 = re.compile(f'MET_2018') for distribution in axes.keys(): # fig, ax, rax = make_plot(acc, region=region,distribution=distribution, year=2016, data=data, mc=mc, ylim=(1e-3,1e3), rylim=(0,2),outdir=f'./output/{os.path.basename(indir)}') fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) h = acc[distribution] h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution)) # scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), region) s = Style() try: newax = s.get_binning(distribution, region) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass x, y = {}, {} h2016 = h[sel_2016].integrate('dataset') h2017 = h[sel_2017].integrate('dataset') h2018 = h[sel_2018].integrate('dataset') try: x[2016] = h2016.axis(axes[distribution]).centers() y[2016] = h2016.values()[()] / (np.diff( h2016.axis(axes[distribution]).edges())) / lumi(2016) x[2017] = h2017.axis(axes[distribution]).centers() y[2017] = h2017.values()[()] / (np.diff( h2017.axis(axes[distribution]).edges())) / lumi(2017) x[2018] = h2018.axis(axes[distribution]).centers() y[2018] = h2018.values()[()] / (np.diff( h2018.axis(axes[distribution]).edges())) / lumi(2018) except KeyError: continue for year in x.keys(): ax.plot(x[year], y[year], 'o-', label=f'{year} / {lumi(year)} fb-1') rax.plot(x[year], y[year] / y[2016], '-o') ax.legend(title=region) ax.set_yscale('log') rax.set_xlabel(distribution) ax.set_xlabel(distribution) rax.set_ylabel('Ratio to 2016') ax.set_ylabel('Data cross section / bin') loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylim(0.5, 1.5) outname = f'output/bu/{region}_{distribution}.pdf' fig.savefig(outname) print(f"Saved {outname}") plt.close(fig)
def main(): indir = "../lo_vs_nlo/input/2019-09-23_photon_overlap" acc = acc_from_dir(indir) for region in ['cr_2m_j', 'cr_2e_j', 'cr_1m_j', 'cr_1e_j', 'cr_g_j']: if '_1m_' in region or '_2m_' in region: sel_2017 = re.compile(f'MET_2017') sel_2018 = re.compile(f'MET_2018') else: sel_2017 = re.compile(f'EGamma_2017') sel_2018 = re.compile(f'EGamma_2018') for distribution in ['recoil']: # fig, ax, rax = make_plot(acc, region=region,distribution=distribution, year=2016, data=data, mc=mc, ylim=(1e-3,1e3), rylim=(0,2),outdir=f'./output/{os.path.basename(indir)}') fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) h = acc['recoil'] h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution)) # scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), region) s = Style() try: newax = s.get_binning(distribution, region) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass f = uproot.open('fitDiagnostics.root') h2016 = f['shapes_prefit'][regions[region]]['data'] x, y = {}, {} # x[2016] = np.r_[0.5*(h2016.bins[:,0] + h2016.bins[:,1]),1500., 1700., 1900.] pprint(dir(h2016)) x[2016] = np.r_[h2016.xvalues, 1500., 1700., 1900.] y[2016] = np.r_[h2016.yvalues / lumi(2016), 0, 0, 0] h2017 = h[sel_2017].integrate('dataset') h2018 = h[sel_2018].integrate('dataset') x[2017] = h2017.axis('recoil').centers() y[2017] = h2017.values()[()] / (np.diff( h2017.axis('recoil').edges())) / lumi(2017) x[2018] = h2018.axis('recoil').centers() y[2018] = h2018.values()[()] / (np.diff( h2018.axis('recoil').edges())) / lumi(2018) for year in x.keys(): ax.plot(x[year], y[year], 'o-', label=f'{year}') rax.plot(x[year], y[year] / y[2016], '-o') ax.legend(title=region) ax.set_yscale('log') rax.set_xlabel(distribution) ax.set_xlabel(distribution) rax.set_ylabel('Ratio to 2016') ax.set_ylabel('Data cross section / bin') loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylim(0.5, 1.5) fig.savefig(f'output/{region}.pdf')
def sf_1d(acc, tag, regex, outputrootfile): outdir = './output/' if not os.path.exists(outdir): os.makedirs(outdir) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))) pt_types = ['stat1'] if tag in ['dy','wjet']: pt_types.append('dress') new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',list(range(100,800,100))+list(range(800,1200,200))+list(range(1200,2800,800))) else: new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[200,250]+list(range(300,800,100))+list(range(800,1400,200))) overflow = 'none' for pt_type in pt_types: for selection in ['inclusive','monojet','vbf']: dist = f'gen_vpt_{selection}_{pt_type}' acc.load(dist) h = copy.deepcopy(acc[dist]) h = h.rebin(h.axis('vpt'), new_ax) if selection == 'monojet': h = h.integrate(h.axis("jpt")) if selection == 'vbf': h = h.integrate(h.axis("jpt")) h = h.integrate(h.axis("mjj")) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] hist.plot1d( h, overlay='dataset', overflow=overflow, binwnorm=True, ax=ax) lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*(LHE|amc).*')].integrate('dataset') hist.plotratio(nlo, lo, ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow=overflow, error_opts=data_err_opts, label='2017 NLO/LO ratio' ) # if tag in ['dy','wjet']: old = get_old_kfac(tag) old_x = 0.5*(old.bins[:,0]+old.bins[:,1]) rax.plot(old_x, old.values,'ob-', label='2016 QCD k fac') rax.plot(old_x, old.values * pdfwgt_sf(old_x),'or-', label='2016 x ad-hoc DY pdfwgt SF') ax.set_yscale('log') ax.set_ylim(1e-3,1e6) rax.set_ylim(0,2) rax.legend() fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf')) sf_x = lo.axis('vpt').edges(overflow=overflow) sf_y = nlo.values(overflow=overflow)[()] / lo.values(overflow=overflow)[()] outputrootfile[f'{tag}_{pt_type}_{selection}'] = (sf_y,sf_x)
def plot(inpath): indir = os.path.abspath(inpath) # The processor output is stored in an # 'accumulator', which in our case is # just a dictionary holding all the histograms # Put all your *coffea files into 'indir' and # pass the directory as an argument here. # All input files in the directory will # automatically be found, merged and read. # The merging only happens the first time # you run over a specific set of inputs. acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3) # Get a settings dictionary that details # which plots to make for each region, # what the axis limits are, etc # Can add plots by extending the dictionary # Or modify axes ranges, etc settings = plot_settings() merged = set() # Separate plots per year for year in [2017, 2018]: # The data to be used for each region # Muon regions use MET, # electron+photon regions use EGamma # ( EGamma = SingleElectron+SinglePhoton for 2017) data = { 'sr_vbf': None, 'cr_1m_vbf': f'MET_{year}', 'cr_2m_vbf': f'MET_{year}', 'cr_1e_vbf': f'EGamma_{year}', 'cr_2e_vbf': f'EGamma_{year}', 'cr_g_vbf': f'EGamma_{year}', } # Same for MC selection # Match datasets by regular expressions # Here for LO V samples (HT binned) mc_lo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_g_vbf': re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|WJetsToLNu.*HT.*).*{year}'), } # Want to compare LO and NLO, # so do same thing for NLO V samples # All non-V samples remain the same mc_nlo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}' ), 'cr_g_vbf': re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|W.*FXFX.*).*{year}'), } regions = list(mc_lo.keys()) # Remove signal region, no need in ratio plots regions.remove('sr_vbf') # Make control region ratio plots for both # LO and NLO. Can be skipped if you only # want data / MC agreement plots. outdir = f'./output/{os.path.basename(indir)}/ratios' # Load ingredients from cache acc.load('mjj') acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') cr_ratio_plot(acc, year=year, tag='losf', outdir=outdir, mc=mc_lo, regions=regions, distribution='mjj') cr_ratio_plot(acc, year=year, tag='nlo', outdir=outdir, mc=mc_nlo, regions=regions, distribution='mjj') # Data / MC plots are made here # Loop over all regions for region in mc_lo.keys(): ratio = True if region != 'sr_vbf' else False # Make separate output direcotry for each region outdir = f'./output/{os.path.basename(indir)}/{region}' # Settings for this region plotset = settings[region] # Loop over the distributions for distribution in plotset.keys(): # Load from cache if not distribution in merged: acc.load(distribution) if not distribution in acc.keys(): print( f"WARNING: Distribution {distribution} not found in input files." ) continue acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) acc[distribution].axis('dataset').sorting = 'integral' merged.add(distribution) try: # The heavy lifting of making a plot is hidden # in make_plot. We call it once using the LO MC make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=mc_lo[region], ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='losf', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) # And then we also call it for the NLO MC # The output files will be named according to the 'tag' # argument, so we will be able to tell them apart. make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=mc_nlo[region], ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='nlo', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) except KeyError: continue
def main(): for mode in ['monojet', 'zpt']: if mode == 'monojet': regions = { # 'ch1' : 'sr_j', 'ch2': 'cr_2m_j', 'ch3': 'cr_1m_j', # # 'ch4' : 'cr_g_j', # # 'ch5' : 'cr_2e_j', 'ch6': 'cr_1e_j' } fitfile = 'fitDiagnostics.root' bins = [ 250, 280, 310, 340, 370, 400, 430, 470, 510, 550, 590, 640, 690, 740, 790, 840, 900, 960, 1020, 1090, 1160, 1250, 1400 ] elif mode == 'zpt': regions = { # 'ch1' : 'sr_j', 'monojet_singlemu': 'cr_1m_j', # 'ch4' : 'cr_g_j', # 'ch5' : 'cr_2e_j', 'monojet_singleel': 'cr_1e_j' } fitfile = 'fitDiagnostics_zpt.root' bins = [250, 275, 300, 350, 400, 450, 500, 650, 800, 1150, 1400] tmp = {} for k, v in regions.items(): tmp[v] = k regions.update(tmp) inpath = "input/2020-01-07_fine_recoil_bins_v4" acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) acc.load('recoil') acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') for region in [x for x in regions.keys() if 'r_' in x]: if '_1m_' in region or '_2m_' in region: sel_2016 = re.compile(f'MET_2016') sel_2017 = re.compile(f'MET_2017') sel_2018 = re.compile(f'MET_2018') else: sel_2016 = re.compile(f'EGamma_2016') sel_2017 = re.compile(f'EGamma_2017') sel_2018 = re.compile(f'EGamma_2018') for distribution in ['recoil']: # fig, ax, rax = make_plot(acc, region=region,distribution=distribution, year=2016, data=data, mc=mc, ylim=(1e-3,1e3), rylim=(0,2),outdir=f'./output/{os.path.basename(indir)}') fig, (ax, rax) = plt.subplots( 2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) h = acc['recoil'] h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution)) # scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate(h.axis('region'), region) s = Style() # Rebin newax = hist.Bin('recoil', 'Recoil (GeV)', bins) h = h.rebin(h.axis(newax.name), newax) # Retrieve input from fit diagnostics f = uproot.open(fitfile) tg2016 = f['shapes_prefit'][regions[region]]['data'] # Prepare x and y values for easy plotting x, y = {}, {} # x[2016] = np.r_[0.5*(h2016.bins[:,0] + h2016.bins[:,1]),1500., 1700., 1900.] x['2016orig'] = np.r_[tg2016.xvalues] y['2016orig'] = np.r_[tg2016.yvalues / lumi(2016)] h2016 = h[sel_2016].integrate('dataset') h2017 = h[sel_2017].integrate('dataset') h2018 = h[sel_2018].integrate('dataset') x[2016] = h2016.axis('recoil').centers() y[2016] = h2016.values()[()] / (np.diff( h2016.axis('recoil').edges())) / lumi(2016) x[2017] = h2017.axis('recoil').centers() y[2017] = h2017.values()[()] / (np.diff( h2017.axis('recoil').edges())) / lumi(2017) x[2018] = h2018.axis('recoil').centers() y[2018] = h2018.values()[()] / (np.diff( h2018.axis('recoil').edges())) / lumi(2018) # Actual plotting for year in x.keys(): print(x[year]) ax.plot(x[year], y[year], 'o-', label=f'{year} ({sum(y[year]*np.diff(bins)):.2e})') rax.plot(x[year], y[year] / y['2016orig'], '-o') ax.legend(title=region) ax.set_yscale('log') rax.set_xlabel(distribution) ax.set_xlabel(distribution) rax.set_ylabel('Ratio to 2016') ax.set_ylabel('Data cross section / bin') loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylim(0.5, 1.5) fig.savefig(f'output/{mode}_{region}.pdf')
def get_pdf_uncertainty(acc, regex, tag, nominal='pdf_0'): '''Given the input accumulator, calculate the PDF uncertainty from all PDF variations.''' # Define rebinning vpt_ax_fine = list(range(0, 400, 40)) + list(range(400, 1200, 80)) if tag in ['wjet', 'dy']: vpt_ax = hist.Bin('vpt', 'V $p_{T}$ (GeV)', vpt_ax_fine) mjj_ax = hist.Bin('mjj', 'M(jj) (GeV)', [0, 200] + list(range(500, 2500, 500))) elif tag in ['gjets']: vpt_ax = hist.Bin('vpt', 'V $p_{T}$ (GeV)', vpt_ax_fine) mjj_ax = hist.Bin('mjj', 'M(jj) (GeV)', [0, 200, 500, 1000, 1500, 2000]) # Set the correct pt type pt_tag = 'combined' if tag != 'gjets' else 'stat1' acc.load(f'gen_vpt_vbf_{pt_tag}') h = acc[f'gen_vpt_vbf_{pt_tag}'] h = h.rebin('vpt', vpt_ax) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] # Integrate out mjj to get 1D variations # as a function of V-pt mjj_slice = slice(200, 7500) h = h.integrate('mjj', mjj_slice, overflow='over') # Get NLO distribution nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset') # Nominal NLO weights, as specified in arguments # By defualt, use first PDF variation as nominal nlo_nom = nlo.integrate('var', nominal).values(overflow='over')[()] # NLO with PDF variations # Use a dict to collect NLO contents with all PDF variations nlo_var = {} for var in nlo.identifiers('var'): var_name = var.name if 'pdf' not in var_name: continue nlo_var[var_name] = nlo.integrate('var', var_name).values(overflow='over')[()] unc, percent_unc = calculate_pdf_unc(nlo_nom, nlo_var, tag) print(percent_unc) plot_variations(nlo_nom, nlo_var, tag) # Plot the % uncertainty as a function of V-pt fig, ax = plt.subplots(1, 1) vpt_edges = vpt_ax.edges(overflow='over') vpt_centers = ((vpt_edges + np.roll(vpt_edges, -1)) / 2)[:-1] ax.plot(vpt_centers, percent_unc, 'o') ax.set_xlabel(r'$p_T(V) \ (GeV)$') ax.set_ylabel(r'$\sigma_{pdf}$ / Nominal Counts') tag_to_title = { 'dy': r'$Z\rightarrow \ell \ell$', 'wjet': r'$W\rightarrow \ell \nu$', 'gjets': r'$\gamma$ + jets' } title = tag_to_title[tag] ax.set_title(title) ax.grid(True) ax.plot([200, 200], [0, 0.07], 'r') ax.set_ylim(0, 0.07) # Save the figure outdir = './output/theory_variations/pdf' if not os.path.exists(outdir): os.makedirs(outdir) outpath = pjoin(outdir, f'{tag}_pdf_unc.pdf') fig.savefig(outpath) # Return nominal weights and uncertainty return nlo_nom, unc, vpt_edges, vpt_centers
def legacy_limit_input_vbf(acc, outdir='./output', unblind=False): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param outdir: Output directory :type outdir: string """ distribution = 'mjj' regions = [ 'cr_2m_vbf', 'cr_1m_vbf', 'cr_2e_vbf', 'cr_1e_vbf', 'cr_g_vbf', 'sr_vbf_no_veto_all' ] if unblind: regions.append("sr_vbf") if not os.path.exists(outdir): os.makedirs(outdir) # Rebin h = copy.deepcopy(acc[distribution]) newax = hist.Bin('mjj', '$M_{jj}$ (GeV)', mjj_bins_2016()) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) for year in [2017, 2018]: signal = re.compile(f'VBF_HToInvisible.*{year}') f = uproot.recreate(pjoin(outdir, f'legacy_limit_vbf_{year}.root')) data, mc = datasets(year, unblind=unblind) for region in regions: print('=' * 20) print(f'Region {region}') print('=' * 20) tag = region.split('_')[0] ih = h.integrate(h.axis('region'), region) for dataset in map(str, h.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset)): # Insert dummy data for the signal region if region == 'sr_vbf' and re.match( 'ZJetsToNuNu.*', dataset) and not unblind: th1 = export1d(ih.integrate('dataset', dataset)) histo_name = 'signal_data' f[histo_name] = th1 continue else: continue print(f"Dataset: {dataset}") th1 = export1d(ih.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name_vbf(dataset)}' print(f'Saved under histogram: {histo_name}') except: print(f"Skipping {dataset}") continue print('-' * 20) f[histo_name] = th1 if not unblind: f[f'{legacy_region_name("sr_vbf")}_data'] = f[ f'{legacy_region_name("sr_vbf")}_qcdzjets'] merge_legacy_inputs(outdir)
def eta_phi_plot_photon(inpath): '''Create 2D eta-phi plot for photons in VBF photon CR.''' indir = os.path.abspath(inpath) acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3) acc.load('sumw') acc.load('sumw_pileup') acc.load('sumw2') acc.load('nevents') outdir = pjoin('./output/', os.path.basename(indir)) if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017, 2018]: data = {'cr_g_vbf': f'EGamma_{year}'} for region, datare in data.items(): distributions = ['photon_eta_phi'] for distribution in distributions: acc.load(distribution) h = copy.deepcopy(acc[distribution]) h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution)) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate('dataset', datare) h = h.integrate(h.axis('region'), region) fig, ax, _ = plot2d(h, xaxis='eta') ax.text(0., 1., region, fontsize=10, horizontalalignment='left', verticalalignment='top', color='white', transform=ax.transAxes) ax.text(1., 0., distribution, fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(1., 1., f'{lumi(year)} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) outname = pjoin(outdir, f'{region}_{distribution}_{year}.pdf') fig.savefig(outname) print(f'Created file {outname}')
def get_scale_variations(acc, regex, tag, scale_var, scale_var_type): '''Calculate the new k-factors with a scale weight variation.''' print(f'Working on: {tag}, {scale_var}') # Define rebinning if tag in ['wjet', 'dy']: vpt_ax_coarse = [0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640, 760, 880,1200] vpt_ax_fine = list(range(0,400,40)) + list(range(400,1200,80)) vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)', vpt_ax_fine) mjj_ax = hist.Bin('mjj','M(jj) (GeV)', [0,200] + list(range(500,2500,500))) elif tag in ['gjets']: vpt_ax_coarse = [0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640] vpt_ax_fine = list(range(0,400,40)) + list(range(400,1200,80)) vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)', vpt_ax_fine) mjj_ax = hist.Bin('mjj','M(jj) (GeV)',[0,200,500,1000,1500]) # Set the correct pt type pt_tag = 'combined' if tag != 'gjets' else 'stat1' acc.load(f'gen_vpt_vbf_{pt_tag}') h = acc[f'gen_vpt_vbf_{pt_tag}'] h = h.rebin('vpt', vpt_ax) h = h.rebin('mjj', mjj_ax) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset') xaxis = lo.axes()[0] yaxis = lo.axes()[1] # Print choose the relevant scale variation (relevant to NLO only) # For LO, choose the nominal (i.e. no variation) lo = lo.integrate('var', 'nominal') nlo_var = nlo.integrate('var', scale_var) nlo_nom = nlo.integrate('var', 'nominal') # Get 1D LO and NLO weights to calculate the variation if tag in ['wjet', 'dy']: mjj_slice = slice(200,2000) elif tag in ['gjets']: mjj_slice = slice(200,1500) lo_1d = lo.integrate('mjj', mjj_slice, overflow='over') nlo_var_1d = nlo_var.integrate('mjj', mjj_slice, overflow='over') nlo_nom_1d = nlo_nom.integrate('mjj', mjj_slice, overflow='over') sumw_lo_1d = lo_1d.values(overflow='over')[()] sumw_nlo_var_1d = nlo_var_1d.values(overflow='over')[()] sumw_nlo_nom_1d = nlo_nom_1d.values(overflow='over')[()] # Calculate 1D scale factors, nominal and varied # as a function of V-pt sf_nom_1d = sumw_nlo_nom_1d / sumw_lo_1d sf_var_1d = sumw_nlo_var_1d / sumw_lo_1d # Calculate 1D variation ratio, as a function of V-pt var_ratio = sf_var_1d / sf_nom_1d # Calculate nominal 2D scale factor sumw_lo = lo.values(overflow='over')[()] sumw_nlo_nom = nlo_nom.values(overflow='over')[()] sf_nom = sumw_nlo_nom / sumw_lo tup = (var_ratio, h.axis('vpt').edges(overflow='over') ) # Return tuple containing the SF ratios and # NLO weights with and without variation return tup, (sumw_nlo_var_1d, sumw_nlo_nom_1d)
def sf_2d(acc, tag, regex, pt_type, outputrootfile): outdir = './output/2d/' if not os.path.exists(outdir): os.makedirs(outdir) # fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) plt.close('all') # fig = plt.gcf() # fig.clear() fig = plt.figure(figsize=(6,7.5)) ax = plt.gca() # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))) if tag in ['dy', 'wjet']: vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640, 760, 880,1200]) mjj_ax = hist.Bin('mjj','M(jj) (GeV)',list(range(0,2500,500))) clims = 0.5,1.5 elif tag in ['gjets']: vpt_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[0, 40, 80, 120, 160, 200, 240, 280, 320, 400, 520, 640]) mjj_ax = hist.Bin('mjj','M(jj) (GeV)',[0,200,500,1000,1500]) clims = 1.0, 1.5 for selection in ['vbf']: dist = f'gen_vpt_{selection}_{pt_type}' acc.load(dist) h = copy.deepcopy(acc[dist]) print(h) h = h.rebin(h.axis('vpt'), vpt_ax) h = h.rebin(h.axis('mjj'), mjj_ax) h = h.integrate(h.axis("jpt")) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*(LHE|amcat).*')].integrate('dataset') sumw_lo, sumw2_lo = lo.values(overflow='over', sumw2=True)[()] sumw_nlo, sumw2_nlo = nlo.values(overflow='over', sumw2=True)[()] print(sumw_nlo) sf = sumw_nlo / sumw_lo dsf = np.hypot( np.sqrt(sumw2_nlo) / sumw_lo, sumw_nlo * np.sqrt(sumw2_lo) / (sumw_lo**2) ) data = (sf, dsf) pkl_filename = f'{tag}_kfac.pkl' with open(pkl_filename, 'wb') as f: pickle.dump(data, f) xaxis = lo.axes()[0] yaxis = lo.axes()[1] im = ax.pcolormesh(xaxis.edges(overflow='over'), yaxis.edges(overflow='over'), sf.T) with open(pkl_filename, 'ab') as f: pickle.dump((xaxis.edges(overflow='over'), yaxis.edges(overflow='over')), f) x_centers = xaxis.centers(overflow='over') y_centers = yaxis.centers(overflow='over') for ix in range(len(x_centers)): for iy in range(len(y_centers)): textcol = 'white' if sf.T[iy, ix] < 0.5*(clims[0]+clims[1]) else 'black' ax.text( x_centers[ix], y_centers[iy], f' {sf.T[iy, ix]:.3f} \n$\\pm$ {dsf.T[iy, ix]:.2f}', ha='center', va='center', color=textcol, fontsize=6 ) # hist.plotratio(nlo, lo, # ax=rax, # denom_fill_opts={}, # guide_opts={}, # unc='num', # overflow='all', # error_opts=data_err_opts, # label='2017 NLO/LO ratio' # ) # old = get_old_kfac(tag) # old_x = 0.5*(old.bins[:,0]+old.bins[:,1]) # rax.plot(old_x, old.values,'ob-', label='2016 QCD k fac') # rax.plot(old_x, old.values * pdfwgt_sf(old_x),'or-', label='2016 x ad-hoc DY pdfwgt SF') # ax.set_yscale('log') # ax.set_ylim(1e-3,1e6) # rax.set_ylim(0,2) # rax.legend() ax.set_ylabel('$p_{T}(V)$ (GeV)') ax.set_xlabel('M(jj) (GeV)') cb = fig.colorbar(im) cb.set_label('LO $\\rightarrow$ NLO SF') im.set_clim(*clims) fig.savefig(pjoin(outdir,f'2d_{tag}_{dist}.pdf')) # sf_x = lo.axis('vpt').edges() # sf_y = nlo.values()[()] / lo.values()[()] tup = (sf, xaxis.edges(overflow='over'),yaxis.edges(overflow='over')) print(tup[0].shape) print(tup[1].shape) print(tup[2].shape) outputrootfile[f'2d_{tag}_{selection}'] = tup
def cr_ratio_plot( acc, distribution='recoil', regions=['cr_2m_j', 'cr_1m_j', 'cr_1e_j', 'cr_2e_j', 'cr_g_j'], year=2017, tag='', outdir='./output', mc=None, data=None): if not os.path.exists(outdir): os.makedirs(outdir) # Rebin s = Style() h = copy.deepcopy(acc[distribution]) try: newax = s.get_binning(distribution) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) histograms = {} for region in regions: histograms[region] = copy.deepcopy(h).integrate( h.axis('region'), region) if not mc: mc = { 'cr_1m_j': re.compile( f'(TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|W.*HT.*).*{year}' ), 'cr_1e_j': re.compile( f'(TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|W.*HT.*).*{year}' ), 'cr_2m_j': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_2e_j': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_g_j': re.compile(f'(GJets.*|QCD_HT.*|W.*HT.*).*{year}'), } if not data: data = { 'cr_1m_j': f'MET_{year}', 'cr_2m_j': f'MET_{year}', 'cr_1e_j': f'EGamma_{year}', 'cr_2e_j': f'EGamma_{year}', 'cr_g_j': f'EGamma_{year}', 'cr_1m_vbf': f'MET_{year}', 'cr_2m_vbf': f'MET_{year}', 'cr_1e_vbf': f'EGamma_{year}', 'cr_2e_vbf': f'EGamma_{year}', 'cr_g_vbf': f'EGamma_{year}' } name = { 'cr_1m_j': '1$\mu$', 'cr_2m_j': '2$\mu$', 'cr_1e_j': '1e', 'cr_2e_j': '2e', 'cr_g_j': '$\gamma$', 'cr_1m_vbf': '1$\mu$', 'cr_2m_vbf': '2$\mu$', 'cr_1e_vbf': '1e', 'cr_2e_vbf': '2e', 'cr_g_vbf': '$\gamma$' } data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, # 'emarker': '_' } for i in range(len(regions)): for j in range(len(regions)): if i == j: continue fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) h1 = histograms[regions[i]] h2 = histograms[regions[j]] print(data[regions[i]]) h1_data = h1[data[regions[i]]].integrate('dataset') h1_mc = h1[mc[regions[i]]].integrate('dataset') h2_data = h2[data[regions[j]]].integrate('dataset') h2_mc = h2[mc[regions[j]]].integrate('dataset') # Ratio plot def ratio(num, den): num_sumw, num_sumw2 = num.values(sumw2=True, overflow='over')[()] den_sumw, den_sumw2 = den.values(sumw2=True, overflow='over')[()] rsumw_err = np.hypot( np.sqrt(num_sumw2) / den_sumw, num_sumw * np.sqrt(den_sumw2) / den_sumw**2) rsumw = num_sumw / den_sumw return rsumw, rsumw_err data_err_opts['color'] = 'k' rsumw_data, rsumw_err_data = ratio(h1_data, h2_data) ax.errorbar(x=h1_data.axis(distribution).centers(overflow='over'), y=rsumw_data, yerr=rsumw_err_data, label='Data', **data_err_opts) # data_err_opts['color'] = 'r' rsumw_mc, rsumw_err_mc = ratio(h1_mc, h2_mc) edges = h1_mc.axis(distribution).edges(overflow='over') ax.step(x=edges, y=np.r_[rsumw_mc[0], rsumw_mc], color=colors['mc'], label='MC') y1 = np.r_[rsumw_mc - rsumw_err_mc, rsumw_mc[-1] - rsumw_err_mc[-1]] y2 = np.r_[rsumw_mc + rsumw_err_mc, rsumw_mc[-1] + rsumw_err_mc[-1]] ax.fill_between(edges, y1=y1, y2=y2, zorder=-1, color=colors['band'], step='post', label='MC stat. unc') ax.set_ylim(0, 5) rrsumw = rsumw_data / rsumw_mc rrsumw_err = rsumw_err_data / rsumw_mc rax.errorbar(x=h1_data.axis(distribution).centers(overflow='over'), y=rrsumw, yerr=rrsumw_err, **data_err_opts) rax.set_ylim(0.75, 1.25) plt.plot([min(edges), max(edges)], [1, 1], color=colors['mc']) y1 = np.r_[(rsumw_mc - rsumw_err_mc) / rsumw_mc, (rsumw_mc[-1] - rsumw_err_mc[-1]) / rsumw_mc[-1]] y2 = np.r_[(rsumw_mc + rsumw_err_mc) / rsumw_mc, (rsumw_mc[-1] + rsumw_err_mc[-1]) / rsumw_mc[-1]] rax.fill_between(edges, y1=y1, y2=y2, zorder=-1, color=colors['band'], step='post') ax.legend(title=f'{name[regions[i]]} over {name[regions[j]]}') fig.text(1., 1., f'{lumi(year)} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) rax.set_xlabel(f'{distribution} (GeV)', fontsize=14) rax.set_ylabel('Data / MC', fontsize=14) ax.set_xlabel(f'{distribution} (GeV)', fontsize=14) ax.set_ylabel( f'Region ratio: {name[regions[i]]} / {name[regions[j]]} (GeV)', fontsize=14) loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') # Save and close fig.savefig( pjoin( outdir, f'ratio_{tag}_{distribution}_{regions[i]}_over_{regions[j]}_{year}.pdf' )) plt.close(fig)
def legacy_limit_input_monov(acc, outdir='./output', unblind=False): """Writes ROOT TH1s to file as a limit input :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator :param outdir: Output directory :type outdir: string """ distribution = 'recoil' regions = [ 'cr_2m_v', 'cr_1m_v', 'cr_2e_v', 'cr_1e_v', 'cr_g_v', 'sr_v_no_veto_all' ] if unblind: regions.append("sr_v") if not os.path.exists(outdir): os.makedirs(outdir) newax = hist.Bin('recoil', 'Recoil (GeV)', recoil_bins_2016()) # Histogram prep, rebin, etc h = copy.deepcopy(acc[distribution]) h = h.rebin(h.axis(newax.name), newax) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) for wp in ['tau21', 'loose', 'tight']: for year in [2017, 2018]: f = uproot.recreate( pjoin(outdir, f'legacy_limit_monov_{wp}_{year}.root')) data, mc = datasets(year, unblind) for region in regions: if wp == 'tau21': monov_region_name = region else: if region.endswith("_v"): monov_region_name = region.replace('_v', f'_{wp}_v') else: monov_region_name = region.replace('_v_', f'_{wp}_v_') print(f'Region {region}') ih = h.integrate(h.axis('region'), monov_region_name) for dataset in map(str, ih.axis('dataset').identifiers()): if not (data[region].match(dataset) or mc[region].match(dataset)): continue print(f" Dataset: {dataset}") th1 = export1d(ih.integrate('dataset', dataset)) try: histo_name = f'{legacy_region_name(region)}_{legacy_dataset_name(dataset)}' except: print(f"Skipping {dataset}") continue f[histo_name] = th1 if not unblind: f[f'{legacy_region_name("sr_v")}_data'] = f[ f'{legacy_region_name("sr_v")}_zjets'] merge_legacy_inputs(outdir)
import copy from bucoffea.plot.util import merge_extensions, merge_datasets, scale_xs_lumi acc = acc_from_dir('input/das_lhevpt_v2') data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, 'emarker': '_' } h = copy.deepcopy(acc['gen_vpt']) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate('weight_type', 'nominal') h = h.integrate('weight_index', slice(-0.5, 0.5)) h = h[re.compile('.*DY.*HT.*')].integrate('dataset') new_ax = hist.Bin('vpt', 'Gen V $p_{T}$ (GeV)', list(range(80, 800, 40)) + list(range(800, 2000, 100))) h = h.rebin(h.axis('vpt'), new_ax) print(h) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7),
def eta_phi_plot(inpath): indir = os.path.abspath(inpath) acc = acc_from_dir(indir) outdir = pjoin('./output/', os.path.basename(indir)) if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017, 2018]: data = { 'cr_1m_j': f'MET_{year}', 'cr_2m_j': f'MET_{year}', 'cr_1e_j': f'EGamma_{year}', 'cr_2e_j': f'EGamma_{year}', # 'cr_g_j' : f'EGamma_{year}', } for region, datare in data.items(): distributions = ['ak4_eta_phi'] if 'e_' in region: distributions.append('electron_eta_phi') elif 'm_' in region: distributions.append('muon_eta_phi') for distribution in distributions: h = copy.deepcopy(acc[distribution]) h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution)) scale_xs_lumi(h) h = merge_datasets(h) h = h.integrate('dataset', datare) h = h.integrate(h.axis('region'), region) fig, ax, _ = plot2d(h, xaxis='eta') ax.text(0., 1., region, fontsize=10, horizontalalignment='left', verticalalignment='top', color='white', transform=ax.transAxes) ax.text(1., 0., distribution, fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(1., 1., f'{lumi(year)} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) outname = pjoin(outdir, f'{region}_{distribution}_{year}.pdf') fig.savefig(outname) print(f'Created file {outname}')
def plot_recoil(acc, xmax=1e3, ymin=0, ymax=1.1, region_tag="1m", dataset='SingleMuon', year=2018, tag="test", distribution="recoil", axis_name=None, noscale=False, jeteta_config=None, output_format='pdf'): # Select and prepare histogram h = copy.deepcopy(acc[distribution]) h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution), noscale=noscale) if not noscale: scale_xs_lumi(h) h = merge_datasets(h) # Rebinning axis_name = distribution if not axis_name else axis_name if 'photon' in distribution: newbin = hist.Bin( axis_name, f"{axis_name} (GeV)", np.array( list(range(0, 250, 10)) + list(range(250, 400, 50)) + list(range(400, 1100, 100)))) elif distribution == 'mjj': newbin = hist.Bin( axis_name, r'$M_{jj}$ (GeV)', np.array( list(range(200, 600, 200)) + list(range(600, 1500, 300)) + [1500, 2000, 2750, 3500])) else: newbin = hist.Bin( axis_name, f"{axis_name} (GeV)", np.array(list(range(0, 500, 25)) + list(range(500, 1100, 100)))) h = h.rebin(h.axis(axis_name), newbin) ds = f'{dataset}_{year}' # Pick dataset and regions h = h.integrate(h.axis('dataset'), ds) if jeteta_config: hnum = h.integrate(h.axis('region'), f'tr_{region_tag}_num_{jeteta_config}') hden = h.integrate(h.axis('region'), f'tr_{region_tag}_den_{jeteta_config}') else: hnum = h.integrate(h.axis('region'), f'tr_{region_tag}_num') hden = h.integrate(h.axis('region'), f'tr_{region_tag}_den') # Recoil plot try: fig, ax, _ = hist.plot1d(hnum, binwnorm=True) except KeyError: pprint(h.axis('region').identifiers()) print(f'ERROR: {region_tag}, {dataset}, {year}') return hist.plot1d(hden, ax=ax, clear=False, binwnorm=True) plt.yscale('log') plt.gca().set_ylim(0.1, 1e6) outdir = f"./output/{tag}" if not os.path.exists(outdir): os.makedirs(outdir) outname = f'{region_tag}{"_noscale_" if noscale else "_"}{distribution}_{dataset}_{year}{"_"+jeteta_config if jeteta_config else ""}' fig.savefig(pjoin(outdir, f'{outname}.{output_format}')) with open(pjoin(outdir, f'table_{outname}.txt'), "w") as f: f.write(content_table(hnum, hden, axis_name) + "\n") plt.close(fig) # Efficiency plot fig, ax, _ = hist.plotratio(hnum, hden, guide_opts={}, unc='clopper-pearson', error_opts=markers('data')) ax.set_ylim(ymin, ymax) ax.set_xlim(0, xmax) ax.set_ylabel("Efficiency") plt.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % lumi_by_region(region_tag, year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(1., 0.95, f'{jeteta_config if jeteta_config else ""}', fontsize=12, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(0., 1., f'{region_tag}, {year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) plt.text(1., 0., f'{trgname(year, tag)}', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) if 'g_' in region_tag: plt.plot([215, 215], [0.8, 1.1], 'r-') plt.plot([0, xmax], [0.95, 0.95], 'r-') fig.savefig(pjoin(outdir, f'eff_{outname}.pdf')) plt.close(fig)
def plot(args): indir = os.path.abspath(args.inpath) # The processor output is stored in an # 'accumulator', which in our case is # just a dictionary holding all the histograms # Put all your *coffea files into 'indir' and # pass the directory as an argument here. # All input files in the directory will # automatically be found, merged and read. # The merging only happens the first time # you run over a specific set of inputs. acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3) # Get a settings dictionary that details # which plots to make for each region, # what the axis limits are, etc # Can add plots by extending the dictionary # Or modify axes ranges, etc settings = plot_settings() merged = set() # Separate plots per year for year in [2017, 2018]: # The data to be used for each region # Muon regions use MET, # electron+photon regions use EGamma # ( EGamma = SingleElectron+SinglePhoton for 2017) data = { 'sr_vbf': f'MET_{year}', 'cr_1m_vbf': f'MET_{year}', 'cr_2m_vbf': f'MET_{year}', 'cr_1e_vbf': f'EGamma_{year}', 'cr_2e_vbf': f'EGamma_{year}', 'cr_g_vbf': f'EGamma_{year}', } # Same for MC selection # Match datasets by regular expressions # Here for LO V samples (HT binned) mc_lo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_g_vbf': re.compile( f'(GJets_(DR-0p4|SM).*|QCD_data.*|WJetsToLNu.*HT.*).*{year}'), } # Load ingredients from cache acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') # Data / MC plots are made here # Loop over all regions for region in mc_lo.keys(): if not re.match(args.region, region): continue # Plot ratio pads for all regions (now that we're unblinded) ratio = True # Make separate output direcotry for each region outdir = f'./output/{os.path.basename(indir)}/{region}' # Settings for this region plotset = settings[region] # Loop over the distributions for distribution in plotset.keys(): if not re.match(args.distribution, distribution): continue # Load from cache if not distribution in merged: acc.load(distribution) if not distribution in acc.keys(): print( f"WARNING: Distribution {distribution} not found in input files." ) continue acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) acc[distribution].axis('dataset').sorting = 'integral' merged.add(distribution) try: # The heavy lifting of making a plot is hidden # in make_plot. We call it once using the LO MC imc = mc_lo[region] if "cr_g" in region and distribution != "recoil": imc = re.compile( imc.pattern.replace('QCD_data', 'QCD.*HT')) make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=imc, ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='losf', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) except KeyError: continue
def from_coffea(inpath, outfile): acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) # Merging, scaling, etc acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') mjj_ax = hist.Bin('mjj', r'$M_{jj}$ (GeV)', [200, 400, 600, 900, 1200, 1500, 2000, 2750, 3500, 5000]) for distribution in ['mjj','mjj_unc', 'mjj_noewk']: acc.load(distribution) acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution) ) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) acc[distribution] = acc[distribution].rebin(acc[distribution].axis('mjj'), mjj_ax) pprint(acc[distribution].axis('dataset').identifiers()) f = uproot.recreate(outfile) for year in [2017,2018]: # QCD V h_z = acc['mjj'][re.compile(f'ZJetsToNuNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'z_qcd_mjj_nominal_{year}'] = export1d(h_z) h_w = acc['mjj'][re.compile(f'WJetsToLNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'w_qcd_mjj_nominal_{year}'] = export1d(h_w) h_ph = acc['mjj'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset') f[f'gjets_qcd_mjj_nominal_{year}'] = export1d(h_ph) # Scale + PDF variations for QCD Z h_z_unc = acc['mjj_unc'][re.compile(f'ZJ.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') for unc in map(str, h_z_unc.axis('uncertainty').identifiers()): if 'goverz' in unc or 'ewkcorr' in unc: continue h = h_z_unc.integrate(h_z_unc.axis('uncertainty'), unc) f[f'z_qcd_mjj_{unc}_{year}'] = export1d(h) # EWK variations for QCD Z # Get EWK down variation first h_z_unc_ewk = acc['mjj_noewk'][re.compile(f'ZJetsToNuNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'z_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_z_unc_ewk) # Get EWK up variation h_z_unc_ewk.scale(-1) h_z_diff = h_z.copy().add(h_z_unc_ewk) h_z_unc_ewk_down = h_z.add(h_z_diff) f[f'z_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_z_unc_ewk_down) # EWK variations for QCD W # Get EWK down variation first h_w_unc_ewk = acc['mjj_noewk'][re.compile(f'WJetsToLNu.*HT.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'w_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_w_unc_ewk) # Get EWK up variation h_w_unc_ewk.scale(-1) h_w_diff = h_w.copy().add(h_w_unc_ewk) h_w_unc_ewk_down = h_w.add(h_w_diff) f[f'w_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_w_unc_ewk_down) # Scale + PDF variations for QCD photons h_ph_unc = acc['mjj_unc'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset') for unc in map(str, h_ph_unc.axis('uncertainty').identifiers()): if 'zoverw' in unc or 'ewkcorr' in unc: continue h = h_ph_unc.integrate(h_ph_unc.axis('uncertainty'), unc) f[f'gjets_qcd_mjj_{unc}_{year}'] = export1d(h) # EWK variations for QCD photons # Get EWK down variation first h_ph_unc_ewk = acc['mjj_noewk'][re.compile(f'GJets_DR-0p4.*HT.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset') f[f'gjets_qcd_mjj_unc_w_ewkcorr_overz_common_down_{year}'] = export1d(h_ph_unc_ewk) # Get EWK up variation h_ph_unc_ewk.scale(-1) h_ph_diff = h_ph.copy().add(h_ph_unc_ewk) h_ph_unc_ewk_down = h_ph.add(h_ph_diff) f[f'gjets_qcd_mjj_unc_w_ewkcorr_overz_common_up_{year}'] = export1d(h_ph_unc_ewk_down) # EWK V h_z = acc['mjj'][re.compile(f'.*EWKZ.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'z_ewk_mjj_nominal_{year}'] = export1d(h_z) h_w = acc['mjj'][re.compile(f'.*EWKW.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') f[f'w_ewk_mjj_nominal_{year}'] = export1d(h_w) h_ph = acc['mjj'][re.compile(f'GJets_SM_5f_EWK.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset') f[f'gjets_ewk_mjj_nominal_{year}'] = export1d(h_ph) print(h_ph.values()) # Scale + PDF variations for EWK Z h_z_unc = acc['mjj_unc'][re.compile(f'.*EWKZ.*{year}')].integrate('region', 'sr_vbf').integrate('dataset') for unc in map(str, h_z_unc.axis('uncertainty').identifiers()): if 'goverz' in unc or 'ewkcorr' in unc: continue h = h_z_unc.integrate(h_z_unc.axis('uncertainty'), unc) f[f'z_ewk_mjj_{unc}_{year}'] = export1d(h) # Scale + PDF variations for EWK photons h_ph_unc = acc['mjj_unc'][re.compile(f'GJets_SM.*{year}')].integrate('region', 'cr_g_vbf').integrate('dataset') for unc in map(str, h_ph_unc.axis('uncertainty').identifiers()): if 'zoverw' in unc or 'ewkcorr' in unc: continue h = h_ph_unc.integrate(h_ph_unc.axis('uncertainty'), unc) f[f'gjets_ewk_mjj_{unc}_{year}'] = export1d(h)