Python make_dataset_groups示例

编程语言: Python

命名空间/包名称: Utilities.plot_tools

方法/功能: make_dataset_groups

hotexamples.com的示例: 7

Python make_dataset_groups - 已找到7个示例。这些是从开源项目中提取的最受好评的Utilities.plot_tools.make_dataset_groups现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

## get data lumi and scale MC by lumi
data_lumi_year = prettyjson.loads(
    open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]
lumi_correction = load(
    '%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' %
    (proj_dir, jobid))
for hname in hdict.keys():
    if hname == 'cutflow': continue
    hdict[hname].scale(lumi_correction[args.year]['%ss' % args.lepton],
                       axis='dataset')

## make groups based on process
process = hist.Cat("process", "Process", sorting='placement')
process_cat = "dataset"
process_groups = plt_tools.make_dataset_groups(args.lepton, args.year)
#set_trace()
for hname in hdict.keys():
    if hname == 'cutflow': continue
    hdict[hname] = hdict[hname].group(process_cat, process, process_groups)

    ## make plots
for hname in variables.keys():
    if hname not in hdict.keys():
        raise ValueError("%s not found in file" % hname)

    xtitle, rebinning, x_lims, withData, btag_splitting = variables[hname]

    if btag_splitting:
        histo = hdict[hname][:, :, :, args.lepton, :, :].integrate(
            'leptype')  # process, jmult, lepton, lepcat, btagregion

示例#2

显示文件

    if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
]  # gets ttJets(_PS)_other, ...
if len(ttJets_cats) > 0:
    for tt_cat in ttJets_cats:
        ttJets_lumi_topo = '_'.join(
            tt_cat.split('_')[:-2]) if 'sl_tau' in tt_cat else '_'.join(
                tt_cat.split('_')
                [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
        ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
        lumi_correction.update({tt_cat: ttJets_eff_lumi})

## make groups based on process
process = hist.Cat("process", "Process", sorting='placement')
process_cat = "dataset"
process_groups = plt_tools.make_dataset_groups(args.lepton,
                                               args.year,
                                               samples=names)

# scale and group hists by process
for hname in hdict.keys():
    if hname == 'cutflow': continue
    hdict[hname].scale(lumi_correction, axis='dataset')  # scale hists
    hdict[hname] = hdict[hname].group(process_cat, process,
                                      process_groups)  # group by process
    hdict[hname] = hdict[hname][:, :, args.lepton].integrate(
        'leptype')  # only pick out specified lepton

    ## make bp plots
if ((args.plot == 'all') or (args.plot == 'bp')):
    for hname in bp_variables.keys():
        if hname not in hdict.keys():

示例#3

显示文件

if len(ttJets_cats) > 0:
    for tt_cat in ttJets_cats:
        ttJets_lumi_topo = '_'.join(
            tt_cat.split('_')[:-2]) if 'sl_tau' in tt_cat else '_'.join(
                tt_cat.split('_')
                [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
        mu_lumi = lumi_correction['Muons'][ttJets_lumi_topo]
        el_lumi = lumi_correction['Electrons'][ttJets_lumi_topo]
        lumi_correction['Muons'].update({tt_cat: mu_lumi})
        lumi_correction['Electrons'].update({tt_cat: el_lumi})

## make groups based on process
process = hist.Cat("process", "Process", sorting='placement')
process_cat = "dataset"
process_groups = plt_tools.make_dataset_groups('Muon',
                                               args.year,
                                               samples=names)

## make plots
for hname in variables.keys():
    if hname not in hdict.keys():
        raise ValueError("%s not found in file" % hname)

    histo = hdict[hname]
    h_mu = histo[:, :, :, 'Muon', :].integrate('leptype')
    h_mu.scale(lumi_correction['Muons'], axis='dataset')
    h_el = histo[:, :, :, 'Electron', :].integrate('leptype')
    h_el.scale(lumi_correction['Electrons'], axis='dataset')
    h_tot = h_mu + h_el
    h_tot = h_tot.group(process_cat, process, process_groups)
    #set_trace()

示例#4

显示文件

def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)

示例#5

显示文件

文件： make_raw_templates.py 项目： jdulemba/NanoAOD_Analyses

def get_bkg_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use][
        Plotter.
        nonsignal_samples]  # process, sys, jmult, leptype, btag, lepcat

    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)

    ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = [
        "*right", "*matchable", "*unmatchable", "*sl_tau", "*other"
    ]
    names = [
        dataset
        for dataset in sorted(set([key[0] for key in histo.values().keys()]))
    ]  # get dataset names in hists
    ttJets_cats = [
        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...

    ## make groups based on process
    process = hist.Cat("process", "Process", sorting="placement")
    process_cat = "dataset"

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        #set_trace()
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset")

        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = "_".join(tt_cat.split(
                    "_")[:-2]) if "sl_tau" in tt_cat else "_".join(
                        tt_cat.split("_")
                        [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})

        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        histo = histo.group(process_cat, process,
                            process_groups)[:, :, :,
                                            lep, :, :].integrate("leptype")

        #set_trace()
        systs = sorted(set([key[1] for key in histo.values().keys()]))
        systs.insert(0, systs.pop(
            systs.index("nosys")))  # move "nosys" to the front

        # loop over each jet multiplicity
        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            # get sideband and signal region hists
            cen_sb_histo = Plotter.linearize_hist(
                histo[:, "nosys", jmult,
                      btag_reg_names_dict["Central"]["reg"]].integrate(
                          "jmult").integrate("btag").integrate("sys"))
            #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag")
            #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag")
            sig_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      btag_reg_names_dict["Signal"]["reg"]].integrate(
                          "jmult").integrate("btag"))

            # loop over each systematic
            for sys in systs:
                if sys not in systematics.template_sys_to_name[
                        args.year].keys():
                    continue

                sys_histo = sig_histo[:, sys].integrate(
                    "sys") if sys in systematics.ttJets_sys.values(
                    ) else Plotter.BKG_Est(
                        sig_reg=sig_histo[:, sys].integrate("sys"),
                        sb_reg=cen_sb_histo,
                        norm_type="SigMC",
                        sys=sys,
                        ignore_uncs=True)

                ## write nominal and systematic variations for each topology to file
                #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])):
                for proc in sorted(
                        set([key[0] for key in sys_histo.values().keys()])):
                    if ("tt" not in proc) and (
                            sys in systematics.ttJets_sys.values()):
                        continue
                    #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue
                    if (proc == "data_obs") and not (sys == "nosys"): continue
                    if not sys_histo[proc].values().keys():
                        #if not sig_histo[proc, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sys, proc)
                    #set_trace()
                    outhname = "_".join(
                        list(
                            filter(None, [
                                proc, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = sys_histo[proc].integrate("process")
                    #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys")

                    #set_trace()
                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{proc}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{proc}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")

示例#6

显示文件

文件： make_raw_templates.py 项目： jdulemba/NanoAOD_Analyses

def get_sig_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    widthTOname = lambda width: str(width).replace(".", "p")
    nameTOwidth = lambda width: str(width).replace("p", ".")

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError(f"{hname_to_use} not found in file")
    xrebinning, yrebinning = linearize_binning
    #xrebinning, yrebinning = mtt_ctstar_2d_binning
    histo = hdict[hname_to_use]  # process, sys, jmult, leptype, btag, lepcat

    #set_trace()
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[Plotter.signal_samples, :, :, :,
                        "btagPass"].integrate("btag")

    names = [
        dataset for dataset in sorted(
            set([key[0] for key in rebin_histo.values().keys()]))
    ]  # get dataset names in hists

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))
    signals = [sig for sig in signals
               if "TTJetsSL" in sig]  # only use SL decays

    systs = sorted(set([key[1] for key in rebin_histo.values().keys()]))
    systs.insert(0,
                 systs.pop(systs.index("nosys")))  # move "nosys" to the front

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

        # write signal dists to temp file
    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        # scale by lumi
        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        histo = histo.group(
            "dataset", hist.Cat("process", "Process", sorting="placement"),
            process_groups)

        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            #set_trace()
            lin_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      lep].integrate("jmult").integrate("leptype"))
            for signal in signals:
                if "Int" in signal:
                    boson, mass, width, pI, wt = tuple(signal.split("_"))
                else:
                    boson, mass, width, pI = tuple(signal.split("_"))
                sub_name = "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower(), wt
                ]) if pI == "Int" else "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower()
                ])

                #set_trace()
                for sys in systs:
                    if sys not in systematics.template_sys_to_name[
                            args.year].keys():
                        continue
                    if not lin_histo[signal, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sub_name, sys)
                    outhname = "_".join(
                        list(
                            filter(None, [
                                sub_name, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = lin_histo[signal, sys].integrate(
                        "process").integrate("sys")

                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{signal}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{signal}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")

示例#7

显示文件

文件： ttbar_alpha_reco_plotter.py 项目： jdulemba/NanoAOD_Analyses

        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...
    if len(ttJets_cats) > 0:
        for tt_cat in ttJets_cats:
            ttJets_lumi_topo = '_'.join(
                tt_cat.split('_')[:-2]) if 'sl_tau' in tt_cat else '_'.join(
                    tt_cat.split('_')
                    [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
            mu_lumi = lumi_correction[year]['Muons'][ttJets_lumi_topo]
            el_lumi = lumi_correction[year]['Electrons'][ttJets_lumi_topo]
            lumi_correction[year]['Muons'].update({tt_cat: mu_lumi})
            lumi_correction[year]['Electrons'].update({tt_cat: el_lumi})

        ## make groups based on process
    process_groups = plt_tools.make_dataset_groups(
        'Muon', year, samples=names)  # works when only MC present

    mthad_fit_range = (0.9, 2.2
                       )  ## chosen because ~95% of events fall in this range
    #mthad_fit_range = (0.9, 3.5)
    for hname in variables.keys():
        if not hname in hdict.keys():
            raise ValueError("Hist %s not found" % hname)

        #set_trace()
        histo = hdict[hname]
        ## rescale hist by lumi for muons and electrons separately and then combine
        h_mu = histo[:, 'Muon'].integrate('leptype')
        h_mu.scale(lumi_correction[year]['Muons'], axis='dataset')
        h_el = histo[:, 'Electron'].integrate('leptype')
        h_el.scale(lumi_correction[year]['Electrons'], axis='dataset')