Exemplo n.º 1
0
def test_export1d():
    import uproot
    import os
    from coffea.hist import export1d

    counts, test_eta, test_pt = dummy_jagged_eta_pt()
    h_regular_bins = hist.Hist("regular_joe", hist.Bin("x", "x", 20, 0, 200))
    h_regular_bins.fill(x=test_pt)

    hout = export1d(h_regular_bins)

    filename = 'test_export1d.root'

    with uproot.create(filename) as fout:
        fout['regular_joe'] = hout
        fout.close()

    with uproot.open(filename) as fin:
        hin = fin['regular_joe']

    assert (np.all(hin.edges == hout.edges))
    assert (np.all(hin.values == hout.values))

    del hin
    del fin

    if os.path.exists(filename):
        os.remove(filename)
 def test_read_root_multiple_trees(self, table):
     import uproot
     # append hasn't been implemented in uproot 3 yet
     with utils.TemporaryFilename(suffix='.root') as tmp:
         with uproot.create(tmp) as root:
             root["a"] = uproot.newtree({"branch": "int32"})
             root["a"].extend({"branch": asarray([1, 2, 3, 4, 5])})
             root["b"] = uproot.newtree()
         with pytest.raises(ValueError) as exc:
             self.TABLE.read(tmp)
         assert str(exc.value).startswith('Multiple trees found')
         self.TABLE.read(tmp, treename="a")
Exemplo n.º 3
0
def write_hist(histfile: Path, name: str, hist: np.array,
               bin_edges: np.array) -> None:
    if histfile.exists():
        raise Exception(f"Error writing {histfile}, already exists.")
    if histfile.suffix == ".npz":
        np.savez_compressed(histfile, **{
            name: hist,
            "bin_edges": bin_edges,
        })
    elif histfile.suffix == ".txt":
        np.savetxt(
            histfile,
            hist,
            header="bin edges:\n" + str(bin_edges) + f"\n{name}:",
        )
    elif histfile.suffix == ".hdf5":
        try:
            import h5py

            with h5py.File(histfile, "w") as f:
                f.create_dataset(name,
                                 data=hist,
                                 compression="gzip",
                                 compression_opts=9)
                f.create_dataset(
                    "bin_edges",
                    data=bin_edges,
                    compression="gzip",
                    compression_opts=9,
                )
        except ModuleNotFoundError:
            raise Exception("Please install h5py to write hdf5 files")
    elif histfile.suffix == ".root":
        import uproot

        # TODO: Discard sumw2?
        if hist.ndim == 1:
            from uproot_methods.classes.TH1 import from_numpy

            h = from_numpy([hist, bin_edges])
        else:
            from uproot_methods.classes.TH2 import from_numpy

            h = from_numpy([hist, np.arange(0, hist.shape[0] + 1), bin_edges])
        with uproot.create(histfile) as f:
            f[name] = h
    else:
        raise Exception(f"Unknown output format: {histfile.suffix}")
from coffea import hist
from coffea.util import load, save
import processmap

hists_unmapped = load('hists_Hbb_create_2017.coffea')


hists = {}
for key, val in hists_unmapped.items():
    if isinstance(val, hist.Hist):
        hists[key] = processmap.apply(val)


if os.path.exists("templates.root"):
    os.remove("templates.root")
fout = uproot.create("templates.root")

nodata = re.compile("(?!data_obs)")
h = hists['templates_signalregion'][nodata]
lumi = 41.1
h.scale({p: lumi for p in h[nodata].identifiers('process')}, axis="process")

for proc in h.identifiers('process'):
    for i, ptbin in enumerate(h.identifiers('AK8Puppijet0_pt')):
        for syst in h.identifiers('systematic'):
            mproj = (slice(None), 'all')
            systreal = syst
            fail_template = (h.project('process', proc)
                              .project('AK8Puppijet0_isHadronicV', *mproj)
                              .project('systematic', systreal)
                              .project('AK8Puppijet0_pt', ptbin)
import uproot

h = uproot.create("testdata/test.root")

h["deepntuplizer/tree"] = {}

with open("config/data-params.json") as fh:
    params = json.load(fh)

num_lines = 3

for feat in params["features"]:
    h["deepntuplizer/tree"][feat] = [0] * num_lines
    
for label in params["labels"]:
    h["deepntuplizer/tree"][label] = [1] * num_lines

for spec in params["spectators"]:
    h["deepntuplizer/tree"][spec] = [1] * num_lines
Exemplo n.º 6
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)
Exemplo n.º 7
0
def drawStack(h,
              var_name,
              var_label,
              lumifb,
              sel,
              vars_cut,
              save=False,
              drawData=False):
    # make cuts
    exceptions = ['process', var_name]
    for var, val in vars_cut.items():
        exceptions.append(var)
    x = h.sum(*[ax for ax in h.axes() if ax.name not in exceptions])
    for var, val in vars_cut.items():
        if var != var_name:
            x = x.integrate(var, slice(val[0], val[1]))
    if var_name in vars_cut.keys():
        x = x[:, vars_cut[var_name][0]:vars_cut[var_name][1]]

    # numbers
    #x[privsig].scale(0.3799) # xsec?
    #x[privsig].scale(0.29) # higgs pT?

    scalesig = 5000.
    scalehpt = 0.29
    hsig = x[privsig]
    hsig.scale(scalesig)
    hsig.scale(scalehpt)

    all_bkg = 0
    for key, val in x[bkg].values().items():
        all_bkg += val.sum()

    all_sig = 0
    for key, val in x[privsig].values().items():
        all_sig += val.sum()

    if all_bkg > 0:
        print('allbkg %.2f, allsig %.2f, s/sqrt(b) %.4f' %
              (all_bkg, all_sig, all_sig / np.math.sqrt(all_bkg)))

    # save
    if save:
        out = "templates_%s.root" % var_name
        if os.path.exists(out):
            os.remove(out)

        fout = uproot.create(out)
        for ih, proc in enumerate(x.identifiers('process')):
            pass_template = (x.integrate('process', proc))
            name = "%s_pass" % (proc.name)
            if 'data_obs' in proc.name:
                name = 'data_obs_pass'
            if 'hww_private' in proc.name:
                print('scaling by hpt', proc)
                pass_template.scale(scalehpt)
            fout[name] = hist.export1d(pass_template)
        fout.close()

    # identifiers
    xaxis = var_name
    x.axis(xaxis).label = var_label
    for ih, hkey in enumerate(x.identifiers('process')):
        name = process_latex[hkey.name]
        if 'hww_private' in hkey.name:
            name += ' x %i' % scalesig
        x.identifiers('process')[ih].label = name

    x.axis('process').sorting = 'integral'

    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    hist.plot1d(
        x[bkg],
        overlay='process',
        ax=ax,
        clear=False,
        stack=True,
        fill_opts=fill_opts,
        error_opts=err_opts,
    )
    if drawData:
        hist.plot1d(x['data_obs_jetht'],
                    overlay="process",
                    ax=ax,
                    clear=False,
                    error_opts=data_err_opts)

    hist.plot1d(x[offsig],
                ax=ax,
                overlay='process',
                clear=False,
                error_opts={
                    'color': 'aquamarine',
                    'linewidth': 2
                })
    hist.plot1d(hsig,
                ax=ax,
                overlay='process',
                clear=False,
                error_opts={
                    'color': 'greenyellow',
                    'linewidth': 2
                })

    ax.autoscale(axis='x', tight=True)
    maxval = 0
    for newkey, val in x[bkg].values().items():
        maxval = max(maxval, np.amax(val))

    ax.set_ylim(0, None)
    ax.ticklabel_format(axis='x', style='sci')
    old_handles, old_labels = ax.get_legend_handles_labels()
    if 'triggermuon' in sel:
        leg = ax.legend(handles=old_handles,
                        labels=old_labels,
                        title='Muon trigger')
    elif 'triggerhad' in sel:
        leg = ax.legend(handles=old_handles,
                        labels=old_labels,
                        title='Hadronic trigger')
    elif 'triggermuonall' in sel:
        leg = ax.legend(handles=old_handles,
                        labels=old_labels,
                        title='All trigger (muon)')
    elif 'triggerelectronall' in sel:
        leg = ax.legend(handles=old_handles,
                        labels=old_labels,
                        title='All trigger (electron)')
    else:
        leg = ax.legend(handles=old_handles,
                        labels=old_labels,
                        title='All trigger')

    lumi = plt.text(1.,
                    1.,
                    r"%i fb$^{-1}$ (13 TeV)" % lumifb,
                    fontsize=16,
                    horizontalalignment='right',
                    verticalalignment='bottom',
                    transform=ax.transAxes)
    fig.savefig("stack_%s_lumi%i.pdf" % (var_name, lumifb))
    ax.set_ylim(0.01, 100 * maxval)
    ax.set_yscale('log')
    fig.savefig("stack_%s_lumi%i_log.pdf" % (var_name, lumifb))
Exemplo n.º 8
0
    parser.add_argument('fnames', type=str, nargs='+', help='Histogram files')
    args = parser.parse_args()

    for fname in args.fnames:
        outname = fname.replace('.coffea', '.root')
        try:
            if os.path.getmtime(fname) < os.path.getmtime(outname):
                continue
        except FileNotFoundError:
            pass
        print(f'Converting {fname}')
        hists = load(fname)

        if os.path.exists(outname):
            os.remove(outname)
        fout = uproot.create(outname)

        if isinstance(hists, tuple):
            hists = hists[0]

        for key, h in hists.items():
            if not isinstance(h, hist.Hist): continue
            for dataset in h.identifiers('dataset'):
                for channel in h.identifiers('channel'):
                    newhist = h.integrate('dataset', dataset).integrate(
                        'channel', channel)
                    hname = '{}_{}_{}'.format(dataset, channel, key)
                    fout[hname] = hist.export1d(newhist)

        fout.close()