def test_export1d(): import uproot import os from coffea.hist import export1d counts, test_eta, test_pt = dummy_jagged_eta_pt() h_regular_bins = hist.Hist("regular_joe", hist.Bin("x", "x", 20, 0, 200)) h_regular_bins.fill(x=test_pt) hout = export1d(h_regular_bins) filename = 'test_export1d.root' with uproot.create(filename) as fout: fout['regular_joe'] = hout fout.close() with uproot.open(filename) as fin: hin = fin['regular_joe'] assert (np.all(hin.edges == hout.edges)) assert (np.all(hin.values == hout.values)) del hin del fin if os.path.exists(filename): os.remove(filename)
def test_read_root_multiple_trees(self, table): import uproot # append hasn't been implemented in uproot 3 yet with utils.TemporaryFilename(suffix='.root') as tmp: with uproot.create(tmp) as root: root["a"] = uproot.newtree({"branch": "int32"}) root["a"].extend({"branch": asarray([1, 2, 3, 4, 5])}) root["b"] = uproot.newtree() with pytest.raises(ValueError) as exc: self.TABLE.read(tmp) assert str(exc.value).startswith('Multiple trees found') self.TABLE.read(tmp, treename="a")
def write_hist(histfile: Path, name: str, hist: np.array, bin_edges: np.array) -> None: if histfile.exists(): raise Exception(f"Error writing {histfile}, already exists.") if histfile.suffix == ".npz": np.savez_compressed(histfile, **{ name: hist, "bin_edges": bin_edges, }) elif histfile.suffix == ".txt": np.savetxt( histfile, hist, header="bin edges:\n" + str(bin_edges) + f"\n{name}:", ) elif histfile.suffix == ".hdf5": try: import h5py with h5py.File(histfile, "w") as f: f.create_dataset(name, data=hist, compression="gzip", compression_opts=9) f.create_dataset( "bin_edges", data=bin_edges, compression="gzip", compression_opts=9, ) except ModuleNotFoundError: raise Exception("Please install h5py to write hdf5 files") elif histfile.suffix == ".root": import uproot # TODO: Discard sumw2? if hist.ndim == 1: from uproot_methods.classes.TH1 import from_numpy h = from_numpy([hist, bin_edges]) else: from uproot_methods.classes.TH2 import from_numpy h = from_numpy([hist, np.arange(0, hist.shape[0] + 1), bin_edges]) with uproot.create(histfile) as f: f[name] = h else: raise Exception(f"Unknown output format: {histfile.suffix}")
from coffea import hist from coffea.util import load, save import processmap hists_unmapped = load('hists_Hbb_create_2017.coffea') hists = {} for key, val in hists_unmapped.items(): if isinstance(val, hist.Hist): hists[key] = processmap.apply(val) if os.path.exists("templates.root"): os.remove("templates.root") fout = uproot.create("templates.root") nodata = re.compile("(?!data_obs)") h = hists['templates_signalregion'][nodata] lumi = 41.1 h.scale({p: lumi for p in h[nodata].identifiers('process')}, axis="process") for proc in h.identifiers('process'): for i, ptbin in enumerate(h.identifiers('AK8Puppijet0_pt')): for syst in h.identifiers('systematic'): mproj = (slice(None), 'all') systreal = syst fail_template = (h.project('process', proc) .project('AK8Puppijet0_isHadronicV', *mproj) .project('systematic', systreal) .project('AK8Puppijet0_pt', ptbin)
import uproot h = uproot.create("testdata/test.root") h["deepntuplizer/tree"] = {} with open("config/data-params.json") as fh: params = json.load(fh) num_lines = 3 for feat in params["features"]: h["deepntuplizer/tree"][feat] = [0] * num_lines for label in params["labels"]: h["deepntuplizer/tree"][label] = [1] * num_lines for spec in params["spectators"]: h["deepntuplizer/tree"][spec] = [1] * num_lines
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
def drawStack(h, var_name, var_label, lumifb, sel, vars_cut, save=False, drawData=False): # make cuts exceptions = ['process', var_name] for var, val in vars_cut.items(): exceptions.append(var) x = h.sum(*[ax for ax in h.axes() if ax.name not in exceptions]) for var, val in vars_cut.items(): if var != var_name: x = x.integrate(var, slice(val[0], val[1])) if var_name in vars_cut.keys(): x = x[:, vars_cut[var_name][0]:vars_cut[var_name][1]] # numbers #x[privsig].scale(0.3799) # xsec? #x[privsig].scale(0.29) # higgs pT? scalesig = 5000. scalehpt = 0.29 hsig = x[privsig] hsig.scale(scalesig) hsig.scale(scalehpt) all_bkg = 0 for key, val in x[bkg].values().items(): all_bkg += val.sum() all_sig = 0 for key, val in x[privsig].values().items(): all_sig += val.sum() if all_bkg > 0: print('allbkg %.2f, allsig %.2f, s/sqrt(b) %.4f' % (all_bkg, all_sig, all_sig / np.math.sqrt(all_bkg))) # save if save: out = "templates_%s.root" % var_name if os.path.exists(out): os.remove(out) fout = uproot.create(out) for ih, proc in enumerate(x.identifiers('process')): pass_template = (x.integrate('process', proc)) name = "%s_pass" % (proc.name) if 'data_obs' in proc.name: name = 'data_obs_pass' if 'hww_private' in proc.name: print('scaling by hpt', proc) pass_template.scale(scalehpt) fout[name] = hist.export1d(pass_template) fout.close() # identifiers xaxis = var_name x.axis(xaxis).label = var_label for ih, hkey in enumerate(x.identifiers('process')): name = process_latex[hkey.name] if 'hww_private' in hkey.name: name += ' x %i' % scalesig x.identifiers('process')[ih].label = name x.axis('process').sorting = 'integral' fig, ax = plt.subplots(1, 1, figsize=(8, 8)) hist.plot1d( x[bkg], overlay='process', ax=ax, clear=False, stack=True, fill_opts=fill_opts, error_opts=err_opts, ) if drawData: hist.plot1d(x['data_obs_jetht'], overlay="process", ax=ax, clear=False, error_opts=data_err_opts) hist.plot1d(x[offsig], ax=ax, overlay='process', clear=False, error_opts={ 'color': 'aquamarine', 'linewidth': 2 }) hist.plot1d(hsig, ax=ax, overlay='process', clear=False, error_opts={ 'color': 'greenyellow', 'linewidth': 2 }) ax.autoscale(axis='x', tight=True) maxval = 0 for newkey, val in x[bkg].values().items(): maxval = max(maxval, np.amax(val)) ax.set_ylim(0, None) ax.ticklabel_format(axis='x', style='sci') old_handles, old_labels = ax.get_legend_handles_labels() if 'triggermuon' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='Muon trigger') elif 'triggerhad' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='Hadronic trigger') elif 'triggermuonall' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger (muon)') elif 'triggerelectronall' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger (electron)') else: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger') lumi = plt.text(1., 1., r"%i fb$^{-1}$ (13 TeV)" % lumifb, fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.savefig("stack_%s_lumi%i.pdf" % (var_name, lumifb)) ax.set_ylim(0.01, 100 * maxval) ax.set_yscale('log') fig.savefig("stack_%s_lumi%i_log.pdf" % (var_name, lumifb))
parser.add_argument('fnames', type=str, nargs='+', help='Histogram files') args = parser.parse_args() for fname in args.fnames: outname = fname.replace('.coffea', '.root') try: if os.path.getmtime(fname) < os.path.getmtime(outname): continue except FileNotFoundError: pass print(f'Converting {fname}') hists = load(fname) if os.path.exists(outname): os.remove(outname) fout = uproot.create(outname) if isinstance(hists, tuple): hists = hists[0] for key, h in hists.items(): if not isinstance(h, hist.Hist): continue for dataset in h.identifiers('dataset'): for channel in h.identifiers('channel'): newhist = h.integrate('dataset', dataset).integrate( 'channel', channel) hname = '{}_{}_{}'.format(dataset, channel, key) fout[hname] = hist.export1d(newhist) fout.close()