def make_signal_plot(sigh, title=None, overflow='over', yscale='log'): import matplotlib.pyplot as plt import numpy as np from coffea import hist fig, ax = plt.subplots(1, 1, figsize=(8, 6)) hist.plot1d( sigh, overlay='dataset', ax=ax, overflow=overflow, ) ax.set_yscale(yscale) ax.autoscale(axis='both', tight=True) ymin, ymax = ax.get_ylim() if yscale == 'linear': ymax = (ymax - ymin) * 1.2 + ymin if yscale == 'log': ymax = 10**(((np.log10(ymax) - np.log10(ymin)) * 1.2) + np.log10(ymin)) ax.set_ylim(ymin, ymax) ax.set_xlabel(ax.get_xlabel(), x=1, ha='right') ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") ax.set_title(title, x=0.0, ha="left") ax.text(1, 1, '59.74/fb (13TeV)', ha='right', va='bottom', transform=ax.transAxes) return fig, ax
def plot_rebsmear_prediction(acc_rs, h_qcd, distribution='mjj', dataset='JetHT_2017', region='cr_vbf_qcd'): '''Together with the data - MC prediction from VBF, plot the rebalance and smear prediction.''' acc_rs.load(distribution) h = acc_rs[distribution] # Merge the JetHT datasets together h = rs_merge_datasets(h) if distribution in BINNINGS.keys(): new_ax = BINNINGS[distribution] h = h.rebin(new_ax.name, new_ax) h = h.integrate('region', region)[dataset] fig, ax = plt.subplots() hist.plot1d(h, ax=ax, overlay='dataset', binwnorm=1) hist.plot1d(h_qcd, ax=ax, binwnorm=1, clear=False) ax.set_yscale('log') ax.set_ylim(1e-4, 1e2) fig.savefig('test.pdf')
def kinematic_selection_plot(hname, btype, dataset_name, selection_name, yscale="linear"): print( f"Making plot: {hname}, {btype}, {dataset_name}, {selection_name}, {yscale}" ) fig, ax = plt.subplots(1, 1, figsize=(10, 7)) print(histograms[btype][hname].axes()) print(histograms[btype][hname].axis("dataset").identifiers()) print(histograms[btype][hname].axis("selection").identifiers()) hist.plot1d(histograms[btype][hname].integrate("dataset", [(dataset_name)]).integrate( "selection", [(selection_name)]), error_opts={ "marker": ".", "linestyle": "none", "markersize": 10., "color": "k", "elinewidth": 1 }) if "inclusive" in dataset_name: dataset_tag = "inclusive" elif "probefilter" in dataset_name: dataset_tag = "probefilter" plt.tight_layout() fig.savefig( f"{figure_directory}/{hname}_{btype}_{dataset_tag}_{selection_name}.png" ) plt.close(fig)
def plot_ht_dist(acc, regex, tag): '''Given the accumulator and the dataset regex, plot the HT distribution.''' acc.load('lhe_ht') h = acc['lhe_ht'] h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) # Choose the relevant dataset(s) h = h[re.compile(regex)] new_ht_bins = hist.Bin('ht', r'$H_T \ (GeV)$', 50, 0, 4000) h = h.rebin('ht', new_ht_bins) # Plot the HT distribution fig, ax = plt.subplots(1, 1) hist.plot1d(h, ax=ax, overflow='all', binwnorm=True, overlay='dataset') ax.set_yscale('log') ax.set_ylim(1e-3, 1e6) if 'gjets' in tag: ax.plot([600, 600], [1e-3, 1e6]) if not os.path.exists('./output'): os.mkdir('output') fig.savefig(f'./output/{tag}_lhe_ht.pdf')
def plot(hist_mc=None, hist_data=None, xlim=[], xscale="", xlabel="", ylim=[], yscale="", ylabel="", data_selection="", mc_selection="", savetag=""): hist_mc = hist_mc.integrate("selection", mc_selection) print(hist_data.axis("selection").identifiers()) hist_data = hist_data.integrate("selection", data_selection) # Normalize MC to data print(hist_data) print(hist_data.values()) data_norm = hist_data.values().sum() hist_all = copy.deepcopy(hist_data).add(hist_mc) fig, ax = plt.subplots(1, 1, figsize=(10, 12)) hist.plot1d(hist_all, overlay="dataset", ax=ax[0]) ax[0].set_xlim(xlim) ax[0].set_xscale(xscale) ax[0].set_xlabel(xlabel) ax[0].set_ylim(ylim) ax[0].set_yscale(yscale) ax[0].set_ylabel(ylabel) hist.plotratio( num=hist_all.integrate("dataset", "Run2018"), den=hist_all.integrate("dataset", "Bu2KJpsi2KMuMu_probefilter"), unc='num', ax=ax[1]) ax[1].set_xlim(xlim) ax[1].set_xscale(xscale) ax[1].set_xlabel(xlabel) ax[1].set_ylabel("Data / MC") fig.savefig(f"{figure_directory}/{savetag}.png")
def test_plot1d(): # histogram creation and manipulation # matplotlib import matplotlib.pyplot as plt plt.switch_backend("agg") from coffea import hist lepton_kinematics = fill_lepton_kinematics() # looking at lepton pt for all eta lepton_pt = lepton_kinematics.integrate("eta", overflow="under") ax = hist.plot1d( lepton_pt, overlay="flavor", stack=True, fill_opts={ "alpha": 0.5, "edgecolor": (0, 0, 0, 0.3) }, ) # all matplotlib primitives are returned, in case one wants to tweak them # e.g. maybe you really miss '90s graphics... # Clearly the yields are much different, are the shapes similar? lepton_pt.label = "Density" hist.plot1d(lepton_pt, overlay="flavor", density=True) return ax.figure
def test_plot1d(): # histogram creation and manipulation from coffea import hist # matplotlib import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt lepton_kinematics = fill_lepton_kinematics() # looking at lepton pt for all eta lepton_pt = lepton_kinematics.integrate("eta", overflow='under') fig, ax, primitives = hist.plot1d(lepton_pt, overlay="flavor", stack=True, fill_opts={ 'alpha': .5, 'edgecolor': (0, 0, 0, 0.3) }) # all matplotlib primitives are returned, in case one wants to tweak them # e.g. maybe you really miss '90s graphics... primitives['legend'].shadow = True # Clearly the yields are much different, are the shapes similar? lepton_pt.label = "Density" fig, ax, primitives = hist.plot1d(lepton_pt, overlay="flavor", density=True)
def plot_lhe_v_pt(acc, tag, regex, outputrootfile, pttype): outdir = './output/' if not os.path.exists(outdir): os.makedirs(outdir) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))) new_ax = hist.Bin('vpt', 'LHE V $p_{T}$ (GeV)', list(range(80, 800, 40)) + list(range(800, 2000, 100))) for dist in ['gen_vpt']: h = copy.deepcopy(acc[dist]) h = h.integrate('type', pttype) h = h.rebin(h.axis('vpt'), new_ax) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] h = h.integrate('weight_type', 'nominal') h = h.integrate('weight_index', slice(-0.5, 0.5)) hist.plot1d(h, overlay='dataset', overflow='all', binwnorm=True, ax=ax) lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*LHE.*')].integrate('dataset') hist.plotratio(nlo, lo, ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, label='2017 NLO/LO ratio') old = get_old_kfac(tag) old_x = 0.5 * (old.bins[:, 0] + old.bins[:, 1]) rax.plot(old_x, old.values, 'ob-', label='2016 QCD k fac') rax.plot(old_x, old.values * pdfwgt_sf(old_x), 'or-', label='2016 x ad-hoc DY pdfwgt SF') ax.set_yscale('log') ax.set_ylim(1e-3, 1e6) rax.set_ylim(0, 2) rax.legend() fig.savefig(pjoin(outdir, f'{tag}_{dist}.pdf')) sf_x = lo.axis('vpt').edges() sf_y = nlo.values()[()] / lo.values()[()] # try: # f = uproot.create(f'gen_v_pt_qcd_sf.root') # except OSError: outputrootfile[tag] = (sf_y, sf_x)
def plot_htmiss_before_and_after(outdir, infile, dataset_tag='jetht', plot_gen=True): '''Do the actual plotting of distributions.''' f = uproot.open(infile) htmiss_bef = f['htmiss_before'] htmiss_aft = f['htmiss_after'] fig, ax = plt.subplots() hep.histplot(htmiss_bef.values, htmiss_bef.edges, ax=ax, label='Before rebalancing') hep.histplot(htmiss_aft.values, htmiss_aft.edges, ax=ax, label='After rebalancing') ax.set_xlabel(r'$H_T^{miss} \ (GeV)$', fontsize=14) ax.set_ylabel(r'Counts', fontsize=14) ax.set_yscale('log') ax.set_ylim(1e-1, 1e7) ax.legend(title='Rebalancing') ax.text(0., 1., f'{tag_to_plottag(dataset_tag)} 2017', fontsize=14, ha='left', va='bottom', transform=ax.transAxes) # If we're looking at QCD and plot_gen=True, plot the GEN HTmiss distribution as well if dataset_tag == 'qcd' and plot_gen: # Coffea file to take GEN HT-miss distribution from accpath = './input/qcd_QCD_HT700to1000-mg_new_pmx_2017.coffea' acc = load(accpath) distribution = 'gen_htmiss_noweight' h = acc[distribution].integrate('dataset').integrate( 'region', 'inclusive') hist.plot1d(h, ax=ax, clear=False) handles, labels = ax.get_legend_handles_labels() for handle, label in zip(handles, labels): if label == 'None': handle.set_label(r'GEN $H_T^{miss}$') ax.legend(handles=handles) outpath = pjoin(outdir, f'htmiss_before_after_reb.pdf') fig.savefig(outpath) plt.close(fig) print(f'File saved: {outpath}')
def make_full_prediction_distribution(trainoutdir, trainingdata, key): print( f"Make predictions on full training dataset: {trainingdata} and model in {trainoutdir}" ) from coffea import hist import xgboost as xgb ## full dataset df = pd.read_hdf(trainingdata, key) featurecols = [x for x in df.columns if x != 'label'] dfull = xgb.DMatrix(df[featurecols], label=df['label']) ## default and optimized models xgbm_default = xgb.Booster({"nthread": 16}) xgbm_default.load_model(join(trainoutdir, "model_default/model.bin")) xgbm_optimized = xgb.Booster({"nthread": 16}) xgbm_optimized.load_model(join(trainoutdir, "model_optimized/model.bin")) ## predictions preds_default = xgbm_default.predict(dfull) preds_optimized = xgbm_optimized.predict(dfull) ## making plots label_axis = hist.Cat('label', 'S/B') bdt_axis = hist.Bin('score', 'BDT score', 50, -10, 10) default = hist.Hist("norm. counts", label_axis, bdt_axis) default.fill(label='signal', score=preds_default[df['label'].values.astype(bool)]) default.fill(label='background', score=preds_default[~df['label'].values.astype(bool)]) optimized = hist.Hist("norm. counts", label_axis, bdt_axis) optimized.fill(label='signal', score=preds_optimized[df['label'].values.astype(bool)]) optimized.fill(label='background', score=preds_optimized[~df['label'].values.astype(bool)]) fig, ax = plt.subplots(figsize=(8, 6)) hist.plot1d(default, overlay='label', ax=ax, density=True) ax.set_ylim(0, None) ax.set_title('default BDT response on full dataset', x=0.0, ha="left") ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right") ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") plt.savefig(join(trainoutdir, "prediction_fulldist_default.pdf"), bbox_inches='tight') plt.close() fig, ax = plt.subplots(figsize=(8, 6)) hist.plot1d(optimized, overlay='label', ax=ax, density=True) ax.set_ylim(0, None) ax.set_title('optimized BDT response on full dataset', x=0.0, ha="left") ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right") ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") plt.savefig(join(trainoutdir, "prediction_fulldist_optimized.pdf"), bbox_inches='tight') plt.close()
def pdf_plot(acc): outdir = './output/pdfstudy/' if not os.path.exists(outdir): os.makedirs(outdir) datasets = [ 'WJetsToLNu_HT_MLM_2017', 'DYJetsToLL_M-50_HT_MLM_2017', ] for ds in datasets: fig, ax, rax = fig_ratio() h = acc['gen_vpt'] h = h.rebin(h.axis('vpt'), hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 10, 0, 2000)) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h.project(h.axis('dataset'), ds) for pdf in h.axis('pdf').identifiers(): if str(pdf) == 'none': continue data_err_opts['color'] = colors[str(pdf)] hist.plot1d( h.project('pdf', pdf), # overlay='pdf', error_opts=data_err_opts, ax=ax, overflow='all', clear=False) hist.plotratio( h.project('pdf', pdf), h.project('pdf', 'none'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, clear=False, ) ax.set_ylim(1e-3, 1e8) rax.set_ylim(0.9, 1.6) ax.set_yscale('log') leg = ax.legend() for i, pdf in enumerate(h.axis('pdf').identifiers()): if str(pdf) == 'none': continue leg.get_texts()[i].set_text(str(pdf)) fig.savefig(pjoin(outdir, f'{ds}.pdf')) plt.close(fig)
def reduce(folder): variables = [] print(os.listdir(folder)) hists = {} for filename in os.listdir(folder): #filename.split('.')[0] hin = load(folder + '/' + filename) hists[filename] = hin hist.plot1d(hists[filename]) plt.savefig(filename + '.png')
def pdf_plot(acc): outdir = './output/photon_pt_cut/' if not os.path.exists(outdir): os.makedirs(outdir) for year in [2017,2018]: fig = plt.gcf() fig.clf() ax = plt.gca() h = copy.deepcopy(acc['photon_pt0_recoil']) h=h.rebin(h.axis('pt'), hist.Bin("pt",r"$p_{T}^{\gamma}$ (GeV)", [0,175,215,10000])) h=h.rebin(h.axis('recoil'),hist.Bin('recoil','recoil',list(range(200,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250)))) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) # hlow = h.integrate(h.axis('pt'),) pprint(h.axis('dataset').identifiers()) # h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}') h = h.integrate(h.axis('dataset'),f'GJets_HT_MLM_{year}') h = h.integrate(h.axis('region'),'tr_g_notrig_num') pprint(h) hist.plot1d( h, overlay='pt', # error_opts=data_err_opts, ax=ax, overflow='all', clear=False) ax.set_ylim(0,2e5) ax.set_xlim(200,500) ax.set_ylabel('Expected GJets events (a.u.)') # rax.set_ylim(0.9,1.6) # ax.set_yscale('log') leg=ax.legend(['< 175', '175 - 215', '> 215'],title='Photon $p_{T}$') # for i, pdf in enumerate(h.axis('pdf').identifiers()): # if str(pdf)=='none': # continue # leg.get_texts()[i].set_text(str(pdf)) ax.text(0.97, 0.65, 'Photon CR, no trigger applied', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) ax.plot([250,250],[0,1e8],'--',color='grey') fig.savefig(pjoin(outdir,f'photon_pt_cut_{year}.pdf')) plt.close(fig)
def draw(h1, Closure_bin): plt.style.use(hep.style.CMS) plt.rcParams.update({ "font.size": 14, "axes.titlesize": 18, "axes.labelsize": 18, "xtick.labelsize": 12, "ytick.labelsize": 12, }) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), sharex=True) fake_error_opts = { "linestyle": "none", "marker": "+", "markersize": 10.0, "color": "royalblue", "elinewidth": 1, } # -- Draw hist hist.plot1d( h1.integrate('Closure_bin', Closure_bin).sum('dataset'), ax=ax, clear=False, error_opts=fake_error_opts, #density=True ) np.set_printoptions(suppress=True) ax.autoscale(axis="x", tight=True) ax.set_ylim(ymin, ymax) ax.set_xlim(xmin, xmax) # ax.set_xlabel('') ax.set_yscale('log') lum = plt.text( 1.0, 1.0, r"%.2f fb$^{-1}$ (13 TeV)" % (lumi_factor), fontsize=16, horizontalalignment="right", verticalalignment="bottom", transform=ax.transAxes, ) outname = histname + "_" + file_name + ".png" plt.savefig(outname)
def make_mc_plot(bkgh, sigh=None, title=None, overflow='over', yscale='log'): import matplotlib.pyplot as plt import numpy as np from coffea import hist fig, ax = plt.subplots(1, 1, figsize=(8, 6)) hist.plot1d(bkgh, overlay='cat', ax=ax, clear=False, stack=True, overflow=overflow, line_opts=None, fill_opts=fill_opts, error_opts=error_opts) if sigh: hist.plot1d(sigh, overlay='dataset', ax=ax, overflow=overflow, clear=False) ax.set_yscale(yscale) ax.autoscale(axis='both', tight=True) ymin, ymax = ax.get_ylim() if yscale == 'linear': ymax = (ymax - ymin) * 1.2 + ymin if yscale == 'log': ymax = 10**(((np.log10(ymax) - np.log10(ymin)) * 1.2) + np.log10(ymin)) ax.set_ylim(ymin, ymax) ax.set_xlabel(ax.get_xlabel(), x=1, ha='right') ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") ax.set_title(title, x=0.0, ha="left") ax.text(1, 1, '59.74/fb (13TeV)', ha='right', va='bottom', transform=ax.transAxes) if sigh: ax.legend(*groupHandleLabel(ax), prop={ 'size': 8, }, ncol=3) else: leg = ax.legend() return fig, ax
def plot_mhiggs(tag, y1, label1, y2, label2, title=''): fig = plt.figure() hmh = hist.Hist("Events", hist.Cat("process", title), hist.Bin("mhiggs", "Higgs mass [GeV]", 60, 0, 300)) hmh.fill(process=label1, mhiggs=y1) hmh.fill(process=label2, mhiggs=y2) ax = hist.plot1d(hmh, overlay="process", stack=False) filename = tag + '.pdf' fig.savefig(filename) cprint('imgcat ' + filename, 'green') return
def main(): overwrite = True # load the config and the cache # download file from eos in the future? # Inputs are defined in a dictionary # dataset : list of files fileset = { 'tW_scattering': glob.glob("data/samples/*.root"), } # histograms histograms = ["MET_pt"] output = processor.run_uproot_job( fileset, treename='Events', processor_instance=exampleProcessor(), executor=processor.futures_executor, executor_args={ 'workers': 1, 'function_args': { 'flatten': False } }, chunksize=500000, ) # Make a few plots outdir = "tmp_plots/" if not os.path.exists(outdir): os.makedirs(outdir) for name in histograms: print(name) histogram = output[name] if name == 'MET_pt': # rebin new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200) histogram = histogram.rebin('pt', new_met_bins) ax = hist.plot1d( histogram, overlay="dataset", density=False, stack=False ) # make density plots because we don't care about x-sec differences ax.set_yscale('linear') # can be log #ax.set_ylim(0,0.1) ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name))) ax.clear() return output
def truth_nMuon(): fig, ax = plt.subplots( 1, 1, figsize=(10, 7)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) plt.style.use(mplhep.style.ROOT) # Normalize histograms for btype in ["Bu", "Bs"]: normalizations = histograms[btype]["nTruthMuon"].integrate( "nTruthMuon") sfs = {} for k, v in normalizations.values().items(): sfs[k[0]] = 1. / v #pprint(sfs) histograms[btype]["nTruthMuon"].scale(sfs, axis="dataset") for dataset_type in ["inclusive", "probefilter"]: print(dataset_type) for btype in ["Bu", "Bs"]: print(btype) if btype == "Bu": bname = "Bu2KJpsi2KMuMu" elif btype == "Bs": bname = "Bs2PhiJpsi2KKMuMu" dataset_name = f"{bname}_{dataset_type}" #h_truth_nMuon = histograms[btype]["nTruthMuon"].integrate("dataset", [f"{bname}_{dataset_type}"]) #print(dataset_name) #print(histograms[btype]["nTruthMuon"].axis("dataset").identifiers()) histograms[btype]["nTruthMuon"].axis("dataset").index( dataset_name).label = f"{dataset_type}, {btype}" #print(histograms[btype]["nTruthMuon"][([dataset_name]),:]) hist.plot1d(histograms[btype]["nTruthMuon"][([dataset_name]), :], ax=ax, clear=False, overlay="dataset") plt.tight_layout() fig.savefig(f"{figure_directory}/truth_nMuon.png")
def plotHist(output, variable, xlabel): scales = output[variable].integrate(variable).values() data_scale = scales['Data', ] mc_scale = scales['MC', ] scales['Data'] = 1 / data_scale scales['MC'] = 1 / mc_scale del scales['Data', ] del scales['MC', ] output[variable].scale(scales, axis='dataset') fig, ax = plt.subplots(figsize=(5, 5)) ax = hist.plot1d(output[variable], overlay='dataset') ax.set_xlabel(xlabel) ax.set_ylabel('') scales['Data'] = data_scale scales['MC'] = mc_scale output[variable].scale(scales, axis='dataset')
def plot_ht_stitching(acc, tag, regex): outdir = './output/ht/' if not os.path.exists(outdir): os.makedirs(outdir) for dist in ['lhe_ht']: h=copy.deepcopy(acc[dist]) h = merge_extensions(h, acc) scale_xs_lumi(h) h = merge_datasets(h) fig, ax, _ = hist.plot1d( h[re.compile(regex)], overlay='dataset', overflow='all', binwnorm=True) plt.yscale('log') plt.ylim(1e-3,1e6) fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf'))
def main(): args = commandline() output = load(args.file) h = output['weights'] fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8} fig, ax, _ = hist.plot1d(h.integrate('dataset').integrate( "region", args.region)[re.compile(args.regex)], overlay='weight_type', overflow='all', fill_opts=fill_opts) fig.suptitle("Weights") # ax.set_xscale('log') ax.set_yscale('log') ax.set_ylim(0.1, 1e8) try: os.makedirs(args.outpath) except FileExistsError: pass fig.savefig(pjoin(args.outpath, "weights.pdf")) plt.close(fig)
def plot_resolution(tag, y_pred, y_true, y_ref=None): sigma_dnn = y_pred - y_true if y_ref is not None: sigma_cb = y_ref - y_true xmin, xmax, xlabel = -100, 350, "True - Predicted" if y_true.sum() == 0: xmin, xmax, xlabel = 50, 250, "Higgs mass [GeV]" hsigma = hist.Hist("Events", hist.Cat("method", "Reco method"), hist.Bin("sigma", xlabel, 100, xmin, xmax)) hsigma.fill(method="DNN", sigma=sigma_dnn) if y_ref is not None: hsigma.fill(method="CB", sigma=sigma_cb) fig = plt.figure() ax = hist.plot1d(hsigma, overlay="method", stack=False) fit_result, gauss, mask = fit_gauss( hsigma.axis('sigma').centers(), hsigma.values()[('DNN', )]) print('DNN mean = %.2f, std = %.2f' % tuple(fit_result[1:3])) ax.plot(hsigma.axis('sigma').centers()[mask], gauss, color='maroon', linewidth=1, label=r'Fitted function') if y_ref is not None: fit_result, gauss, mask = fit_gauss( hsigma.axis('sigma').centers(), hsigma.values()[('CB', )]) print('Ref. mean = %.2f,x std = %.2f' % tuple(fit_result[1:3])) ax.plot(hsigma.axis('sigma').centers()[mask], gauss, color='navy', linewidth=1, label=r'Fitted function') filename = 'sigma_' + tag + '.pdf' fig.savefig(filename) cprint('imgcat ' + filename, 'green') return
def main(): # Inputs are defined in a dictionary # dataset : list of files fileset = { 'NonthDM': [ "root://cms-xrd-global.cern.ch///store/mc/RunIISummer16NanoAODv4/NonthDMMonoJet_MX-1500_l1-2p_l2-0p04_13TeV-madgraph/NANOAODSIM/PUMoriond17_Nano14Dec2018_102X_mcRun2_asymptotic_v6-v1/260000/F78663A9-8E7F-B74E-8F6F-9C9A61A27AE5.root" ], "Znunu_ht600to800": [ "root://cms-xrd-global.cern.ch///store/mc/RunIISummer16NanoAODv4/ZJetsToNuNu_HT-600To800_13TeV-madgraph/NANOAODSIM/PUMoriond17_Nano14Dec2018_102X_mcRun2_asymptotic_v6-v1/280000/F4921B81-C2E3-6546-9C00-D908A264FFD8.root", ] } # Run the processor output = processor.run_uproot_job( fileset, treename='Events', processor_instance=exampleProcessor(), executor=processor.futures_executor, executor_args={ 'workers': 1, 'function_args': { 'flatten': False } }, chunksize=500000, ) # Make a few plots outdir = "./tmp_plots" if not os.path.exists(outdir): os.makedirs(outdir) for name in ["met", "jet_pt", "new_variable", "jet_pt_met100"]: histogram = output[name] fig, ax, _ = hist.plot1d(histogram, overlay="dataset") ax.set_yscale('log') ax.set_ylim(0.1, 1e5) fig.savefig(os.path.join(outdir, "{}.pdf".format(name)))
def debug_plot_output(output, region='inclusive', outdir='out', logscaley=True): """Dump all histograms as PDF.""" if not os.path.exists(outdir): os.makedirs(outdir) for name in output.keys(): if name.startswith("_"): continue # if any([x in name for x in ['sumw','cutflow','selected_events','kinematics','weights']]): # continue try: if np.sum(output[name].values().values()) == 0: continue except: continue try: h = output[name].integrate("region", region) except: continue print(name) try: fig, ax, _ = hist.plot1d( h, overlay='dataset', overflow='all', ) except: continue fig.suptitle(f'{region}, {name}') # ax.set_xscale('log') if logscaley: ax.set_yscale('log') ax.set_ylim(0.1, 1e8) else: ax.set_ylim(0.1, 1e3) fig.savefig(os.path.join(outdir, f"{region}_{name}.pdf")) plt.close(fig)
def main(): overwrite = True # load the config and the cache cfg = loadConfig() # Inputs are defined in a dictionary # dataset : list of files fileset = { 'tW_scattering': glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/tW_scattering__nanoAOD/merged/*.root"), "TTW": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20_ext1-v1/merged/*.root") \ + glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToQQ_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root"), # "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/*.root") # adding this is still surprisingly fast (20GB file!) "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root") } # histograms histograms = [ "MET_pt", "Jet_pt", "Jet_eta", "Jet_pt_fwd", "W_pt_notFromTop", "GenJet_pt_fwd", "Spectator_pt", "Spectator_eta" ] histograms += [ "Top_pt", "Top_eta", "Antitop_pt", "Antitop_eta", "W_pt", "W_eta", "N_b", "N_jet", "dijet_mass", "dijet_mass_bestW", "dijet_mass_secondW", "digenjet_mass", "dijet_deltaR" ] # initialize cache cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cfg['caches']['simpleProcessor']), serialized=True) if not overwrite: cache.load() if cfg == cache.get('cfg') and histograms == cache.get( 'histograms') and fileset == cache.get('fileset') and cache.get( 'simple_output'): output = cache.get('simple_output') else: # Run the processor output = processor.run_uproot_job( fileset, treename='Events', processor_instance=exampleProcessor(), executor=processor.futures_executor, executor_args={ 'workers': 1, 'function_args': { 'flatten': False } }, chunksize=500000, ) cache['fileset'] = fileset cache['cfg'] = cfg cache['histograms'] = histograms cache['simple_output'] = output cache.dump() # Make a few plots outdir = "./tmp_plots" if not os.path.exists(outdir): os.makedirs(outdir) for name in histograms: print(name) histogram = output[name] if name == 'MET_pt': # rebin new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200) histogram = histogram.rebin('pt', new_met_bins) if name == 'W_pt_notFromTop': # rebin new_pt_bins = hist.Bin('pt', r'$p_{T}(W) \ (GeV)$', 25, 0, 500) histogram = histogram.rebin('pt', new_pt_bins) ax = hist.plot1d( histogram, overlay="dataset", density=False, stack=True ) # make density plots because we don't care about x-sec differences ax.set_yscale('linear') # can be log #ax.set_ylim(0,0.1) ax.figure.savefig(os.path.join(outdir, "{}.pdf".format(name))) ax.clear() ax = hist.plot1d( histogram, overlay="dataset", density=True, stack=False ) # make density plots because we don't care about x-sec differences ax.set_yscale('linear') # can be log #ax.set_ylim(0,0.1) ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name))) ax.clear() return output
def make_plot(acc, region, distribution, year, data, mc, signal=None, outdir='./output/stack/', integrate=None, ylim=None, xlim=None, rylim=None, tag=None, output_format='pdf', ratio=True): """Creates a data vs MC comparison plot :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator """ # Rebin s = Style() h = copy.deepcopy(acc[distribution]) assert (h) try: newax = s.get_binning(distribution, region) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass # Integrate over an extra axis inte_tag = "" if integrate: (inte_axis, inte_low, inte_high) = integrate h = h.integrate(inte_axis, slice(inte_low, inte_high)) #can add an overflow option here inte_tag += "_" + inte_axis + "_" + str(inte_low) + "_" + str( inte_high) # Pick the region we want to look at # E.g. cr_2m_j = Di-Muon control region with monojet selection h = h.integrate(h.axis('region'), region) # Plotting # Add ratio plot at the bottom if specified (default) # Otherwise just plot the histogram if ratio: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1, 1, figsize=(7, 5)) data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } signal_err_opts = { 'linestyle': '-', 'color': 'crimson', 'elinewidth': 1, } # Plot single muon data # Note the syntax we use to pick the data set if data: hist.plot1d(h[data], overlay='dataset', error_opts=data_err_opts, ax=ax, overflow='all', binwnorm=1) if signal: hist.plot1d(h[signal], overlay='dataset', error_opts=signal_err_opts, ax=ax, overflow='all', binwnorm=1, clear=False) # Plot MC background samples # Here we use a regular expression to match # data sets we want hist.plot1d(h[mc], overlay='dataset', stack=True, clear=False, overflow='all', ax=ax, binwnorm=1) # Apply correct colors to BG histograms handles, labels = ax.get_legend_handles_labels() new_labels = [] for handle, label in zip(handles, labels): col = None for k, v in colors.items(): if re.match(k, label): col = v break if col: handle.set_color(col) handle.set_linestyle('-') handle.set_edgecolor('k') l = None channel = channel_name(region) # Pick the proper legend labels for the channel if channel == 'VBF': legend_labels_to_use = legend_labels['VBF'] elif channel in ['Monojet', 'Mono-V']: legend_labels_to_use = legend_labels['Monojet/Mono-V'] # Add in the common labels legend_labels_to_use.update(legend_labels['Common']) for k, v in legend_labels_to_use.items(): if re.match(k, label): l = v new_labels.append(l if l else label) # Update legend try: region_name = s.region_names[region] except KeyError: region_name = region ax.legend(title=region_name, ncol=2, handles=handles, labels=new_labels) # Ratio plot if data: hist.plotratio(h[data].integrate('dataset'), h[mc].integrate('dataset'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts) ax.text(1., 0., distribution, fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) fig.text(1., 1., f'{channel_name(region)}, {lumi(year):.1f} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) # Aesthetics ax.set_yscale("log") ax.set_ylabel('Events / GeV') plot_settings = style.plot_settings() if region in plot_settings.keys(): plot_settings = plot_settings[region] if distribution in plot_settings.keys(): plot_settings = plot_settings[distribution] if ylim: if ylim == "auto": width = np.diff([x for x in h.axes() if "dataset" not in x.name][0].edges()) vmc = h[mc].integrate("dataset").values()[()] / width try: vdata = h[data].integrate("dataset").values()[()] / width except: vdata = vmc if signal: vsig = h[signal].integrate("dataset").values()[()] / width else: vsig = vmc ax.set_ylim( 0.5 * min([ np.min(vmc[vmc > 0]), np.min(vdata[vdata > 0]), np.min(vsig[vsig > 0]) ]), 1e2 * max([np.max(vmc), np.max(vdata), np.min(vsig)]), ) else: ax.set_ylim(ylim[0], ylim[1]) elif 'ylim' in plot_settings.keys(): ax.set_ylim(plot_settings['ylim']) else: ax.set_ylim(1e-1, 1e6) if xlim: ax.set_xlim(xlim[0], xlim[1]) elif 'xlim' in plot_settings.keys(): ax.set_xlim(plot_settings['xlim']) if ratio: if rylim: rax.set_ylim(*rylim) else: rax.set_ylim(0.5, 1.5) loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylabel('Data / MC') if not os.path.exists(outdir): os.makedirs(outdir) for form in output_format.split(','): outpath = pjoin( outdir, f"{region}_{distribution}{inte_tag}_{tag + '_' if tag else ''}{year}.{form}" ) fig.savefig(outpath) print(f"Saved plot file in {outpath}") plt.close('all')
"should_transfer_files": "YES", "when_to_transfer_output": "ON_EXIT", "+DaskSchedulerAddress": '"129.93.183.33:8787"', }) cluster.adapt(minimum_jobs=5, maximum_jobs=100, maximum_memory="4 GB" ) # auto-scale between 5 and 100 jobs (maximum_memory="4 GB") client = Client(cluster) exe_args = { 'client': client, } output = processor.run_uproot_job(fileset, treename='Events', processor_instance=METProcessor(), executor=processor.dask_executor, executor_args=exe_args) # Generates a 1D histogram from the data output to the 'MET' key. fill_opts are optional, to fill the graph (default is a line). hist.plot1d(output['MET'], overlay='dataset', fill_opts={ 'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8 }) # Easy way to print all cutflow dict values. Can just do print(output['cutflow']["KEY_NAME"]) for one. for key, value in output['cutflow'].items(): print(key, value)
} outputs = {} outputs['data'] = processor.run_uproot_job(dataDS, treename='ffNtuplizer/ffNtuple', processor_instance=MuonTimingProcessor(region='CR', data_type='data'), executor=processor.futures_executor, executor_args=dict(workers=12, flatten=False), chunksize=500000, ) ## CHANNEL - 2mu2e fig, ax = plt.subplots(1,1,figsize=(8,6)) h = outputs['data']['ndsa'].integrate('channel', slice(1,2)) hist.plot1d(h, overlay='cat', ax=ax, overflow='over', error_opts=data_err_opts) ax.set_title('[2mu2e|CR] mu-type leptonjet N(dsa)', x=0.0, ha="left") ax.set_yscale('symlog') ax.autoscale(axis='both', tight=True) ax.text(1,1,'59.74/fb (13TeV)', ha='right', va='bottom', transform=ax.transAxes) ax.get_yaxis().set_major_locator(SymmetricalLogLocator(base=10., linthresh=1, subs=range(1,10))) ax.set_xlabel(ax.get_xlabel(), x=1.0, ha="right") ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") fig.savefig(join(outdir, 'ndsa_CR_2mu2e.png')) fig.savefig(join(outdir, 'ndsa_CR_2mu2e.pdf')) plt.close(fig) fig, ax = plt.subplots(1,1,figsize=(8,6)) h = outputs['data']['mutiming'].integrate('channel', slice(1,2)) hist.plot1d(h, overlay='cat', ax=ax, overflow='over', error_opts=data_err_opts) ax.set_title('[2mu2e|CR] mu-type leptonjet mean timing', x=0.0, ha="left")
from coffea import hist from matplotlib import pyplot as plt # Load input output = load('monojet.coffea') h = output['genvpt_check'].project("dataset") oldax = h.axis("vpt") newax = hist.Bin("vpt", r"$p_{T}^{V}$ (GeV)", 25, 0, 2000) h = h.rebin(oldax, newax) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig, ax, _ = hist.plot1d(h, overlay='type', ax=ax) # Ratio plot data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, 'emarker': '_' } a = h.axis('type') h1 = h.project(a, "BU") h2 = h.project(a, "Nano") hist.plotratio(h2,
import lz4.frame as lz4f import cloudpickle from coffea import hist with lz4f.open("hists.cpkl.lz4", mode="r", compression_level=5) as fin: hists = cloudpickle.load(fin) fig, ax, _ = hist.plot1d(hists["sr_met"],overlay="dataset") # ax.set_xscale('log') ax.set_yscale('log') ax.set_ylim(0.1, 1e5) fig.savefig("test.pdf") # print(hists)