示例#1
0
def do_run(args):
    """Run the analysis locally."""
    # Run over all files associated to dataset
    if args.datasrc == 'das':
        fileset = files_from_das(regex=args.dataset)
    else:
        fileset = files_from_eos(regex=args.dataset)

    output = run_uproot_job_nanoaod(
        fileset,
        treename='Events',
        processor_instance=choose_processor(args)(),
        executor=processor.futures_executor,
        executor_args={
            'workers': args.jobs,
            'flatten': True
        },
        chunksize=500000,
    )

    # Save output
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)
    outpath = pjoin(args.outpath, f"monojet_{args.dataset}.coffea")
    save(output, outpath)
示例#2
0
def do_worker(args):
    """Run the analysis on a worker node."""
    # Run over all files associated to dataset

    # Create output directory
    args.outpath = os.path.abspath(args.outpath)
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)

    with open(args.filelist, "r") as f:
        files = [xrootd_format(x.strip()) for x in f.readlines()]
    fileset = {args.dataset: files}

    ndatasets = len(fileset)
    nfiles = sum([len(x) for x in fileset.values()])
    print(f"Running over {ndatasets} datasets with a total of {nfiles} files.")

    output = run_uproot_job_nanoaod(
        fileset,
        treename='Events',
        processor_instance=choose_processor(args)(),
        executor=processor.futures_executor,
        executor_args={
            'workers': args.jobs,
            'flatten': True
        },
        chunksize=500000,
    )

    # Save output
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)
    outpath = pjoin(args.outpath,
                    f"monojet_{args.dataset}_{args.chunk}.coffea")
    save(output, outpath)
示例#3
0
def cmerge(output_file, input_files, force=False):
    print("cmerge(output_file={}, input_files={}".format(
        output_file, input_files))
    if os.path.isfile(output_file) and not force:
        raise ValueError(
            "Output file {} already exists. Use option force to overwrite.".
            format(output_file))
    output = None
    for input_file in input_files:
        this_content = util.load(input_file)
        # Merge datasets to save space
        keys = list(this_content.keys())
        for key in keys:
            if "Bcands" in key or "cutflow" in key:
                continue
            if type(this_content[key]).__name__ == "Hist":
                if "dataset" in [x.name for x in this_content[key].axes()]:
                    subjobs = this_content[key].axis("dataset").identifiers()
                    mapping = {}
                    for subjob in subjobs:
                        runp = re_subjob.search(subjob.name).group()
                        if not runp in mapping:
                            mapping[runp] = []
                        mapping[runp].append(subjob.name)
                    this_content[key] = this_content[key].group(
                        "dataset", hist.Cat("dataset", "Primary dataset"),
                        mapping)

        if not output:
            output = this_content
        else:
            output.add(this_content)
    print(f"Saving output to {output_file}")
    util.save(output, output_file)
示例#4
0
def merge(folder, variable=None, exclude=None):

    lists = {}
    for filename in os.listdir(folder):
        if '.reduced' not in filename: continue
        if filename.split('--')[0] not in lists:
            lists[filename.split('--')[0]] = []
        lists[filename.split('--')[0]].append(folder + '/' + filename)

    for var in lists.keys():
        tmp = {}
        if variable is not None and var not in variable: continue
        if exclude is not None and var in exclude: continue
        print(lists[var])
        for filename in lists[var]:
            print('Opening:', filename)
            hin = load(filename)
            if var not in tmp: tmp[var] = [hin[var]]
            else: tmp[var].append(hin[var])
            del hin
        print(tmp)
        for k in tmp:
            tmp_arr = futuresum(np.array(tmp[k]))
            hists = {}
            hists[k] = tmp_arr[0]
            print(hists)
            save(hists, folder + '/' + k + '.merged')
示例#5
0
def merger(name):
    tstart = time.time()

    # find the directory named as the analyzer name
    if (subprocess.run("find output/ -type d -name '" + name + "'", shell=True, stdout=subprocess.PIPE).stdout.decode("utf-8") == ''):
        raise Exception("Directory not found!")
    print("Merging files in output/" + name)

    # find the files in the directory
    files = subprocess.run("find output/" + name + "/ -type f -name '" + name + "*' -not -path *merged*", shell=True, stdout=subprocess.PIPE)
    file_list = files.stdout.decode("utf-8").splitlines()
    if len(file_list) == 0:
        raise Exception("no files in directory!")

    #loading files into the acumulator and merging then
    for idx, f in tqdm(enumerate(file_list), desc="Merging", unit=" files", total=len(file_list)):
        if (idx == 0): 
            acc = load(file_list[0])
        else:
            acc += load(f)
        os.system("rm -rf " + f)

    #finally saving the merged file into the folder merged/
    print("Saving as output/" + name + "/merged/" + name + "_merged.coffea")
    os.system("mkdir -p output/" + name + "/merged")
    save(acc, "output/" + name + "/merged/" + name + "_merged.coffea")

    elapsed = round(time.time() - tstart, 2)
    print(f"Merge finished in: {elapsed} s")
示例#6
0
def reduce(folder,_dataset=None,variable=None):

     lists = {}
     for filename in os.listdir(folder):
          if '.futures' not in filename: continue
          if filename.split("____")[0] not in lists: lists[filename.split("____")[0]] = []
          lists[filename.split("____")[0]].append(folder+'/'+filename)
          
     for pdi in lists.keys():
          if _dataset is not None and _dataset not in pdi: continue
          tmp={}
          for filename in lists[pdi]:
               print('Opening:',filename)
               hin = load(filename)
               for k in hin.keys():
                    if variable is not None and k!=variable: continue
                    print('Considering variable',k)
                    if k not in tmp: tmp[k]=[hin[k]]
                    else: tmp[k].append(hin[k])
               del hin
          for k in tmp:
               tmp_arr=futuresum(np.array(tmp[k]))
               hists = {}
               hists[k]=tmp_arr[0]
               dataset = hist.Cat("dataset", "dataset", sorting='placement')
               dataset_cats = ("dataset",)
               dataset_map = OrderedDict()
               for d in hists[k].identifiers('dataset'):
                    if d.name.split("____")[0] not in dataset_map: dataset_map[d.name.split("____")[0]] = (d.name.split("____")[0]+"*",)
               hists[k] = hists[k].group(dataset_cats, dataset, dataset_map)
               print(hists)
               save(hists, folder+'/'+k+'--'+pdi+'.reduced')
示例#7
0
def main():

    if len(sys.argv) != 3:
        print("Enter year and index")
        return 

    year = sys.argv[1]
    index = sys.argv[2]

#    cluster = LPCCondorCluster(transfer_input_files="boostedhiggs")
#    cluster.adapt(minimum=1, maximum=200)
#    client = Client(cluster)

    from coffea import processor, util, hist
    from boostedhiggs import HbbProcessor

    infiles=subprocess.getoutput("ls infiles-split/"+year+"_"+str(index)+".json").split()

    uproot.open.defaults["xrootd_handler"] = uproot.source.xrootd.MultithreadedXRootDSource

    for this_file in infiles:
        print(this_file)

        p = HbbProcessor(year=year,tagger='v2')
        args = {'savemetrics':True, 'schema':NanoAODSchema}
        
        out, metrics = processor.run_uproot_job(str(this_file), 'Events', p, processor.futures_executor, args, chunksize=10000) 

        print(f"Output: {out}")
        print(f"Metrics: {metrics}")

        outfile = 'outfiles/'+str(year)+'_'+str(index)+'.coffea'
        util.save(out, outfile)

    return
示例#8
0
def make_pileup(args):
    with open(args.samplejson) as fin:
        samplefiles = json.load(fin)
    sample = samplefiles[args.sample]

    filelist = []
    for dataset, files in sample.items():
        if dataset == 'JetHT' or dataset == 'SingleMuon':
            continue
        for file in files:
            filelist.append((dataset, file))

    final_accumulator = processor.dict_accumulator({
        'pileup':
        processor.dict_accumulator(),
        'sumw':
        processor.dict_accumulator(),
    })
    processor.futures_executor(filelist,
                               get_pileup,
                               final_accumulator,
                               workers=args.workers)

    save(final_accumulator['pileup'], 'correction_files/pileup_mc.coffea')
    save(final_accumulator['sumw'], 'correction_files/sumw_mc.coffea')
示例#9
0
    def _analyze_file(self, file, treename='Events', flatten=True):
        '''
        Analyze a single file. Reads the "Events" tree and converts it into a LazyDataFrame.
        This df is then passed onto the RSPostProcessor.
        '''
        t = uproot.open(file)[treename]
        df = LazyDataFrame(t, flatten=flatten)

        # Dataset name from the filename
        df['dataset'] = re.sub('_rebalanced_tree_(\d+).root', '',
                               os.path.basename(file))
        df['is_data'] = is_data(df['dataset'])

        ichunk = re.findall('tree_(\d+).root', os.path.basename(file))[0]

        if not df['is_data']:
            df['sumw'], df['sumw2'] = self._read_sumw_sumw2(file)

        # Process the dataframe!
        processor_instance = RSPostProcessor()
        out = processor_instance.process(df)

        # Save the output file
        outpath = pjoin(self.outdir,
                        f'rebsmear_{df["dataset"]}_{ichunk}.coffea')
        save(out, outpath)
示例#10
0
def smooth_bkg_templates(fnames_to_run):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    #set_trace()
    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for lep in hdict.keys():
            for tname, orig_template in hdict[lep].items():
                #set_trace()

                proc = tname.split(
                    "_")[0] if not "data_obs" in tname else "data_obs"
                sys = sorted(filter(None, tname.split(f"{proc}_")))[0]
                #if sys == "nosys": continue
                print(lep, jmult, sys, proc)

                # perform smoothing
                smoothed_histo = hdict[lep][f"{proc}_nosys"].copy(
                ) if sys == "nosys" else Plotter.smoothing_mttbins(
                    nosys=hdict[lep][f"{proc}_nosys"],
                    systematic=orig_template,
                    mtt_centers=mtt_centers,
                    nbinsx=len(linearize_binning[0]) - 1,
                    nbinsy=len(linearize_binning[1]) - 1)

                ## save template histos to coffea dict
                if jmult == "3Jets":
                    histo_dict_3j[lep][tname] = smoothed_histo.copy()
                if jmult == "4PJets":
                    histo_dict_4pj[lep][tname] = smoothed_histo.copy()

    #set_trace()
    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            input_dir,
            f"test_smoothed_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            input_dir,
            f"test_smoothed_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea"
        )
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")
示例#11
0
def test_loadsave():
    filename = 'testprocessor.coffea'
    try:
        aprocessor = NanoEventsProcessor()
        save(aprocessor, filename)
        newprocessor = load(filename)
        assert 'pt' in newprocessor.accumulator
        assert newprocessor.accumulator['pt'].compatible(aprocessor.accumulator['pt'])
    finally:
        if os.path.exists(filename):
            os.remove(filename)
示例#12
0
def getPlots(args):
    lumifb = float(args.lumi)
    tag = args.tag
    savename = args.savetag

    odir = 'plots/%s/' % tag
    os.system('mkdir -p %s' % odir)
    pwd = os.getcwd()

    # open hists
    hists_unmapped = load('%s.coffea' % args.hists)
    os.chdir(odir)

    # map to hists
    hists_mapped = {}
    #print(hists_unmapped,hists_mapped)
    for key, val in hists_unmapped.items():

        if isinstance(val, hist.Hist):
            print(key, val)
            hists_mapped[key] = processmap.apply(val)
    # normalize to lumi
    print('normalizing mc to %.1f fb^-1 lumi')
    #print(
    for h in hists_mapped.values():
        h.scale(
            {
                p: lumifb
                for p in h.identifiers('process')
                if 'JetHT' not in str(p) and 'SingleMuon' not in str(p)
            },
            axis="process")
    h.scale({'tt': 0.8}, axis="process")
    h.scale({'tttoleptonic': 0.8}, axis="process")
    h.scale({'tttosemileptonic': 0.8}, axis="process")
    h.scale({'tttohadronic': 0.8}, axis="process")
    # properties
    hist_name = args.hist
    var_name = args.var
    var_label = r"$%s$" % args.varlabel
    vars_cut = {}
    if (len(args.sel) % 3 == 0):
        for vi in range(int(len(args.sel) / 3)):
            vars_cut[args.sel[vi * 3]] = [
                float(args.sel[vi * 3 + 1]),
                float(args.sel[vi * 3 + 2])
            ]
    h = hists_mapped[hist_name]

    save(h, 'test.coffea')
    drawSolo(h, args.hist, var_name, var_label, args.title, lumifb, vars_cut,
             args.regions, savename, args.plotData, args.plotDensity)

    os.chdir(pwd)
示例#13
0
def main():

    fileset = {
        "VBF_HToInvisible_M125_pow_pythia8_2017" : [
            "root://cmsxrootd.fnal.gov//store/user/aandreas/nanopost/03Sep20v7/VBF_HToInvisible_M125_13TeV_TuneCP5_powheg_pythia8/VBF_HToInvisible_M125_pow_pythia8_2017/200925_184136/0000/tree_1.root"
        ]
    }

    years = list(set(map(extract_year, fileset.keys())))
    assert(len(years)==1)

    args = parse_commandline()
    processor_class = args.processor

    if processor_class == 'monojet':
        from bucoffea.monojet import monojetProcessor
        processorInstance = monojetProcessor()
    elif processor_class == 'vbfhinv':
        from bucoffea.vbfhinv import vbfhinvProcessor
        processorInstance = vbfhinvProcessor()
    elif processor_class == 'lhe':
        from bucoffea.gen.lheVProcessor import lheVProcessor
        processorInstance = lheVProcessor()
    elif args.processor == 'purity':
        from bucoffea.photon_purity import photonPurityProcessor
        processorInstance = photonPurityProcessor()
    elif args.processor == 'sumw':
        from bucoffea.gen import mcSumwProcessor
        processorInstance = mcSumwProcessor()

    for dataset, filelist in fileset.items():
        newlist = []
        for file in filelist:
            if file.startswith("/store/"):
                newlist.append("root://cms-xrd-global.cern.ch//" + file)
            else: newlist.append(file)
        fileset[dataset] = newlist

    for dataset, filelist in fileset.items():
        tmp = {dataset:filelist}
        output = run_uproot_job_nanoaod(tmp,
                                    treename='Runs' if args.processor=='sumw' else 'Events',
                                    processor_instance=processorInstance,
                                    executor=processor.futures_executor,
                                    executor_args={'workers': 4, 'flatten': True},
                                    chunksize=500000,
                                    )
        save(output, f"{processor_class}_{dataset}.coffea")
        # Debugging / testing output
        # debug_plot_output(output)
        print_cutflow(output, outfile=f'{processor_class}_cutflow_{dataset}.txt')
示例#14
0
def cmerge(output_file, input_files, force=False):
    print("cmerge(output_file={}, input_files={}".format(
        output_file, input_files))
    if os.path.isfile(output_file) and not force:
        raise ValueError(
            "Output file {} already exists. Use option force to overwrite.".
            format(output_file))
    output = None
    for input_file in input_files:
        if not output:
            output = util.load(input_file)
        else:
            output.add(util.load(input_file))
    print(f"Saving output to {output_file}")
    util.save(output, output_file)
示例#15
0
def main():

    if len(sys.argv) != 3:
        print("Enter year and index")
        return

    year = sys.argv[1]
    index = sys.argv[2]

    #    cluster = LPCCondorCluster(transfer_input_files="boostedhiggs")
    #    cluster.adapt(minimum=1, maximum=200)
    #    client = Client(cluster)

    from coffea import processor, util, hist
    from boostedhiggs import VHProcessor

    infiles = subprocess.getoutput("ls infiles-split/" + year + "_" +
                                   str(index) + ".json").split()

    uproot.open.defaults[
        "xrootd_handler"] = uproot.source.xrootd.MultithreadedXRootDSource

    for this_file in infiles:
        print(this_file)

        p = VHProcessor(year=year, jet_arbitration='ddcvb', btagV2=True)
        #        args = {'client': client, 'savemetrics':True, 'schema':NanoAODSchema, 'align_clusters':True, 'retries': 1}
        args = {'savemetrics': True, 'schema': NanoAODSchema, 'retries': 1}

        #        print("Waiting for at least one worker...")
        #        client.wait_for_workers(1)
        #        out, metrics = processor.run_uproot_job(str(this_file), 'Events', p, processor.dask_executor, args, chunksize=10000)

        out, metrics = processor.run_uproot_job(str(this_file),
                                                'Events',
                                                p,
                                                processor.futures_executor,
                                                args,
                                                chunksize=10000)

        print(f"Output: {out}")
        print(f"Metrics: {metrics}")

        outfile = 'outfiles/' + str(year) + '_' + str(index) + '.coffea'
        util.save(out, outfile)

    return
示例#16
0
def postprocess(folder):

    variables = []
    for filename in os.listdir(folder):
        if '.merged' not in filename: continue
        #if '--' not in filename: continue
        if filename.split('.')[0] not in variables:
            variables.append(filename.split('.')[0])

    hists = {}
    for variable in variables:
        filename = folder + '/' + variable + '.merged'
        print('Opening:', filename)
        hin = load(filename)
        hists.update(hin)
    print(hists)
    save(hists, folder + '.merged')
示例#17
0
def plotter(name, analysis_type):
    print("Starting plots creation")

    print("Saving histograms in output/" + name)

    tstart = time.time()

    p = HistogramingProcessor(name, analysis_type)
    files = ["output/" + name + "/" + name + ".coffea"]
    out = p.accumulator.identity()
    for f in tqdm(files, desc="Processing", unit=" files", total=len(files)):
        out += p.process(f, analysis_type)

    save(out, "output/" + name + "/" + name + "_hists.coffea")

    elapsed = round(time.time() - tstart, 2)

    print(f"Finished in: {elapsed} s")
示例#18
0
def main(args):
    datasets = {}
    with open(args.fileset) as f:
        temp = json.load(f)
        for dsgroup, datasetlist in temp.items():
            if dsgroup != args.year: continue
            datasets = datasetlist

    ds = args.ds
    files = {}
    for process, processds in datasets.items():
        for ids, flist in processds.items():
            if ids != ds: continue
            if args.nsplit != 1:
                lSplit = slice_it(flist, args.nsplit)
                for iL, iList in enumerate(lSplit):
                    if iL == args.isplit:
                        files[ds] = {'files': iList, 'treename': 'Events'}
            else:
                files[ds] = {'files': flist, 'treename': 'Events'}

    p = HwwProcessor(year=args.year,
                     trigger=args.trigger,
                     channel=args.channel,
                     regions=args.regions)

    exe_config = {
        'workers': 4,
        'savemetrics': True,
        'nano': True,
    }

    output, metrics = processor.run_uproot_job(files, 'Events', p,
                                               processor.iterative_executor,
                                               exe_config)
    util.save(
        output, 'output_%s_%iof%i_condor.coffea' %
        (ds.replace('/', '-'), args.isplit, args.nsplit))
示例#19
0
from coffea import hist, lookup_tools
from coffea.util import load, save
from coffea.hist import plot
import os

corrections = load('data/corrections.coffea')

# add gruddt correction derived with 2017 QCD
shift_hist = load(os.path.join(os.path.dirname(__file__), 'data', 'ddtmap_gru_QCD_debug_5.coffea'))
values = shift_hist.values(overflow='none')[()]
rho_bins = shift_hist.axis("jet_rho").edges(overflow='none')
pt_bins = shift_hist.axis("jet_pt").edges(overflow='none')
corrections['2017_gruddt_rho_pt'] = lookup_tools.dense_lookup.dense_lookup(values, (pt_bins,rho_bins))

shift_hist = load(os.path.join(os.path.dirname(__file__), 'data', 'ddtmap_n2_QCD_debug_5.coffea'))
values = shift_hist.values(overflow='none')[()]
rho_bins = shift_hist.axis("jet_rho").edges(overflow='none')
pt_bins = shift_hist.axis("jet_pt").edges(overflow='none')
corrections['2017_n2b1_rho_pt'] = lookup_tools.dense_lookup.dense_lookup(values, (pt_bins,rho_bins))

shift_hist = load(os.path.join(os.path.dirname(__file__), 'data', 'ddtmap_27Jul20_v3_in.coffea'))
values = shift_hist.values(overflow='none')[()]
rho_bins = shift_hist.axis("jet_rho").edges(overflow='none')
pt_bins = shift_hist.axis("jet_pt").edges(overflow='none')
corrections['2017_inddt_rho_pt'] = lookup_tools.dense_lookup.dense_lookup(values, (pt_bins,rho_bins))

save(corrections, 'data/corrections_4.coffea')



args = parser.parse_args()

ttPS_dict = load(args.ttJets_PS_file)
tt_dict = load(args.ttJets_file)
output_dict = tt_dict.copy()

nonTTPS_mask = re.compile('(?!ttJets_PS*)')
sys_mask = re.compile('(?!nosys)')

for hname in ttPS_dict.keys():
    if 'cutflow' in hname: continue
    print(hname)
    histo = output_dict[hname]
    ps_histo = ttPS_dict[hname]

    ## get ttJets_PS hists for systematic variations
    tt_ps_sys_dict = ps_histo['ttJets_PS*', sys_mask, :, :, :, :]
    ## get all non ttJets_PS hists
    nonTTPS_dict = ps_histo[nonTTPS_mask, :, :, :, :, :]

    #set_trace()
    # add hists to ttJets hist
    histo.add(nonTTPS_dict)
    histo.add(tt_ps_sys_dict)

outname = args.output_fname if args.output_fname.endswith(
    '.coffea') else '%s.coffea' % args.output_fname
output_acc = processor.dict_accumulator(output_dict)
save(output_acc, outname)
print('%s written' % outname)
示例#21
0
    mask = ~(pt==np.nan)#just a complicated way to initialize a jagged array with the needed shape to True
    mask = ((pt>10)&(abs(eta)<2.5)&(tight_id==4)&(tightCharge)) # Trigger: HLT_Ele27_WPTight_Gsf_v
    return mask

def isGoodJet(pt, eta, jet_id, jetPtCut=30):
    mask = (pt>jetPtCut) & (abs(eta)<2.4) & ((jet_id&2)==2)
    return mask

def isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15):
  mask = (pt>minpt)&(abs(eta)<2.4)&(abs(dxy)<0.05)&(abs(dz)<0.1)&(miniIso<0.4)&(sip3D<5)&(mvaTTH>0.55)&(mediumPrompt)&(tightCharge==2)&(jetDeepB<0.1522)
  return mask

def isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15):
  miniIsoCut = 0.085 # Tight
  mask = (pt>minpt)&(abs(eta)<2.4)&(abs(dxy)<0.05)&(abs(dz)<0.1)&(miniIso<miniIsoCut)&(sip3D<8)&(mvaTTH>0.125)&(elecMVA>0.80)&(jetDeepB<0.1522)&(lostHits<1)&(convVeto)&(tightCharge==2)
  return mask 

ids = {}
ids['isTightMuonPOG'] = isTightMuonPOG
ids['isTightElectronPOG'] = isTightElectronPOG
ids['isMuonMVA'] = isMuonMVA
ids['isElecMVA'] = isElecMVA
ids['isGoodJet'] = isGoodJet

if not os.path.isdir(outdir): os.system('mkdir -p ' + outdir)
save(ids, outdir+outname+'.coffea')


   

示例#22
0

#Jet ID flags bit1 is loose (always false in 2017 since it does not exist), bit2 is tight, bit3 is tightLepVeto
#POG use tight jetID as a standart JetID


def isGoodJet(pt, eta, jet_id, nhf, nef, chf, cef):
    mask = (pt > 25) & (abs(eta) < 2.4) & ((jet_id & 2) == 2) & (nhf < 0.8) & (
        nef < 0.99) & (chf > 0.1) & (cef < 0.99)
    return mask


def isHEMJet(pt, eta, phi):
    mask = (pt > 30) & ((eta > -3.0) & (eta < -1.3)) & ((phi > -1.57) &
                                                        (phi < -0.87))
    return mask


ids = {}
ids['isLooseElectron'] = isLooseElectron
ids['isTightElectron'] = isTightElectron
ids['isLooseMuon'] = isLooseMuon
ids['isTightMuon'] = isTightMuon
ids['isLooseTau'] = isLooseTau
ids['isLoosePhoton'] = isLoosePhoton
ids['isTightPhoton'] = isTightPhoton
ids['isGoodJet'] = isGoodJet
ids['isGoodFatJet'] = isGoodFatJet
ids['isHEMJet'] = isHEMJet
save(ids, 'data/ids.coffea')
示例#23
0
    sig_hists={}
    data_hists={}
    for key in hists.keys():
        bkg_hists[key] = hists[key].group(cats, process, bkg_map)
        sig_hists[key] = hists[key].group(cats, process, sig_map)
        data_hists[key] = hists[key].group(cats, process, data_map)
    print('Histograms grouped')

    return bkg_hists, sig_hists, data_hists

if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option('-f', '--file', help='file', dest='file')
    parser.add_option('-d', '--directory', help='directory', dest='directory')
    (options, args) = parser.parse_args()

    if options.directory: 
        bkg_hists, sig_hists, data_hists = scale_directory(options.directory)
        name = options.directory
    if options.file: 
        bkg_hists, sig_hists, data_hists = scale_file(options.file)
        name = options.file.split(".")[0]

    hists={
        'bkg': bkg_hists,
        'sig': sig_hists,
        'data': data_hists
    }
    save(hists,name+'.scaled')
示例#24
0
def save_corrections(year):
    corrections = {}

    # golden json
    if year == '2016':
        corrections[
            'golden'] = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/ReReco/Final/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
    if year == '2017':
        corrections[
            'golden'] = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/ReReco/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt'
    if year == '2018':
        corrections[
            'golden'] = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/ReReco/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'

    # cross sections
    corrections['xsec'] = xsec
    # manually add the test samples
    corrections['xsec']['DY'] = 6077.22
    corrections['xsec']['HZZ'] = 43.92 * 2.64e-02 * (3.3658e-2 * 3)**2
    corrections['xsec']['DoubleMuon'] = 1.

    extractor = lookup_tools.extractor()
    # electron
    # POG
    if year == '2016':
        extractor.add_weight_sets([
            'electron_id_Veto_ * data/scalefactors/electron/2016/2016_ElectronWPVeto_Fall17V2.root',
            'electron_id_Loose_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronLoose_Fall17V2.root',
            'electron_id_Medium_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronMedium_Fall17V2.root',
            'electron_id_Tight_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronTight_Fall17V2.root',
            'electron_id_MVA80_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronMVA80_Fall17V2.root',
            'electron_id_MVA90_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronMVA90_Fall17V2.root',
            'electron_id_MVA80noiso_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronMVA80noiso_Fall17V2.root',
            'electron_id_MVA90noiso_ * data/scalefactors/electron/2016/2016LegacyReReco_ElectronMVA90noiso_Fall17V2.root',
        ])
    elif year == '2017':
        extractor.add_weight_sets([
            'electron_id_Veto_ * data/scalefactors/electron/2017/2017_ElectronWPVeto_Fall17V2.root',
            'electron_id_Loose_ * data/scalefactors/electron/2017/2017_ElectronLoose.root',
            'electron_id_Medium_ * data/scalefactors/electron/2017/2017_ElectronMedium.root',
            'electron_id_Tight_ * data/scalefactors/electron/2017/2017_ElectronTight.root',
            'electron_id_MVA80_ * data/scalefactors/electron/2017/2017_ElectronMVA80.root',
            'electron_id_MVA90_ * data/scalefactors/electron/2017/2017_ElectronMVA90.root',
            'electron_id_MVA80noiso_ * data/scalefactors/electron/2017/2017_ElectronMVA80noiso.root',
            'electron_id_MVA90noiso_ * data/scalefactors/electron/2017/2017_ElectronMVA90noiso.root',
        ])
    elif year == '2018':
        extractor.add_weight_sets([
            'electron_id_Veto_ * data/scalefactors/electron/2018/2018_ElectronWPVeto_Fall17V2.root',
            'electron_id_Loose_ * data/scalefactors/electron/2018/2018_ElectronLoose.root',
            'electron_id_Medium_ * data/scalefactors/electron/2018/2018_ElectronMedium.root',
            'electron_id_Tight_ * data/scalefactors/electron/2018/2018_ElectronTight.root',
            'electron_id_MVA80_ * data/scalefactors/electron/2018/2018_ElectronMVA80.root',
            'electron_id_MVA90_ * data/scalefactors/electron/2018/2018_ElectronMVA90.root',
            'electron_id_MVA80noiso_ * data/scalefactors/electron/2018/2018_ElectronMVA80noiso.root',
            'electron_id_MVA90noiso_ * data/scalefactors/electron/2018/2018_ElectronMVA90noiso.root',
        ])

    # HZZ
    extractor.add_weight_sets([
        # electron reco
        f'electron_reco_ * data/scalefactors/electron/{year}/Ele_Reco_{year}.root',
        # electron hzz id
        f'electron_hzz_id_nogap_ * data/scalefactors/electron/{year}/ElectronSF_Legacy_{year}_NoGap.root',
        f'electron_hzz_id_gap_ * data/scalefactors/electron/{year}/ElectronSF_Legacy_{year}_Gap.root',
    ])

    # muon
    # POG
    if year == '2016':
        extractor.add_weight_sets([
            # id
            'muon_id_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunBCDEF_SF_ID.root',
            'muon_id_2_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunGH_SF_ID.root',
            # iso
            'muon_iso_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunBCDEF_SF_ISO.root',
            'muon_iso_2_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_rootfiles_RunGH_SF_ISO.root',
            # jpsi
            'muon_id_jpsi_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_Jpsi_rootfiles_RunBCDEF_SF_ID.root',
            'muon_id_jpsi_2_ * data/scalefactors/muon/2016/EfficienciesStudies_2016_legacy_rereco_Jpsi_rootfiles_RunGH_SF_ID.root',
        ])
    elif year == '2017':
        extractor.add_weight_sets([
            # id
            'muon_id_ * data/scalefactors/muon/2017/EfficienciesStudies_2017_rootfiles_RunBCDEF_SF_ID.root',
            # iso
            'muon_iso_ * data/scalefactors/muon/2017/EfficienciesStudies_2017_rootfiles_RunBCDEF_SF_ISO.root',
            # jpsi
            'muon_id_jpsi_ * data/scalefactors/muon/2017/EfficienciesStudies_2017_rootfiles_RunBCDEF_SF_ID_JPsi.root',
        ])
    elif year == '2018':
        extractor.add_weight_sets([
            # id
            'muon_id_ * data/scalefactors/muon/2018/EfficienciesStudies_2018_rootfiles_RunABCD_SF_ID.root',
            # iso
            'muon_iso_ * data/scalefactors/muon/2018/EfficienciesStudies_2018_rootfiles_RunABCD_SF_ISO.root',
            # jpsi
            'muon_id_jpsi_ * data/scalefactors/muon/2018/EfficienciesStudies_2018_Jpsi_rootfiles_RunABCD_SF_ID.root',
        ])

    extractor.finalize()
    evaluator = extractor.make_evaluator()

    # EGamma POG corrections
    idnums = [
        'Veto',
        'Loose',
        'Medium',
        'Tight',
        'MVA80',
        'MVA90',
        'MVA80noiso',
        'MVA90noiso',
    ]

    for idnum in idnums:
        corrections[f'electron_id_{idnum}'] = evaluator[
            f'electron_id_{idnum}_EGamma_SF2D']

    # HZZ corrections
    corrections['electron_reco'] = evaluator['electron_reco_EGamma_SF2D']
    corrections['electron_hzz_id_nogap'] = evaluator[
        'electron_hzz_id_nogap_EGamma_SF2D']
    corrections['electron_hzz_id_gap'] = evaluator[
        'electron_hzz_id_gap_EGamma_SF2D']

    # Muon POG corrections
    if year == '2016':
        idnums = [
            'LooseID',
            'MediumID',
            'TightID',
            'HighPtID',
        ]
        iddenom = 'genTracks'
        effvars = 'eta_pt'
        highpt_effvars = 'eta_pair_newTuneP_probe_pt'
        iso_num_denoms = [
            ('LooseRelTkIso', 'HighPtIDandIPCut'),
            ('TightRelIso', 'MediumID'),
            ('TightRelIso', 'TightIDandIPCut'),
            ('LooseRelIso', 'LooseID'),
            ('LooseRelIso', 'MediumID'),
            ('LooseRelIso', 'TightIDandIPCut'),
        ]
        jpsinums = [
            'LooseID',
            'MediumID',
            'TightID',
            'SoftID',
        ]
        jpsidenom = 'genTracks'
        jpsieffvars = 'pt_abseta'
    elif year in ['2017', '2018']:
        idnums = [
            'LooseID',
            'MediumID',
            'MediumPromptID',
            'TightID',
            'SoftID',
            'HighPtID',
            'TrkHighPtID',
        ]
        iddenom = 'genTracks'
        if year == '2018':
            iddenom = 'TrackerMuons'
        effvars = 'pt_abseta'
        highpt_effvars = 'pair_newTuneP_probe_pt_abseta'
        iso_num_denoms = [
            ('LooseRelTkIso', 'HighPtIDandIPCut'),
            ('LooseRelTkIso', 'TrkHighPtID'),
            ('TightRelTkIso', 'HighPtIDandIPCut'),
            ('TightRelTkIso', 'TrkHighPtID'),
            ('TightRelIso', 'MediumID'),
            ('TightRelIso', 'TightIDandIPCut'),
            ('LooseRelIso', 'LooseID'),
            ('LooseRelIso', 'MediumID'),
            ('LooseRelIso', 'TightIDandIPCut'),
        ]
        jpsinums = [
            'LooseID',
            'MediumID',
            'TightID',
            'SoftID',
        ]
        jpsidenom = 'genTracks'
        jpsieffvars = 'pt_abseta'

    lumi2016_BCDEF = 19.721 / (16.146 + 19.721)
    lumi2016_GH = 16.146 / (16.146 + 19.721)

    for idnum in idnums:
        histkey = f'NUM_{idnum}_DEN_{iddenom}_{effvars}'
        if idnum in ['HighPtID', 'TrkHighPtID']:
            histkey = f'NUM_{idnum}_DEN_{iddenom}_{highpt_effvars}'
        corrections[f'muon_id_{idnum}'] = evaluator[f'muon_id_{histkey}']
        if year == '2016':
            corrections[f'muon_id_{idnum}']._values *= lumi2016_BCDEF
            corrections[f'muon_id_{idnum}']._values += evaluator[
                f'muon_id_2_{histkey}']._values * lumi2016_GH

    for isonum, isodenom in iso_num_denoms:
        histkey = f'NUM_{isonum}_DEN_{isodenom}_{effvars}'
        if isodenom in ['HighPtIDandIPCut', 'TrkHighPtID']:
            histkey = f'NUM_{isonum}_DEN_{isodenom}_{highpt_effvars}'
        corrections[f'muon_iso_{isonum}_{isodenom}'] = evaluator[
            f'muon_iso_{histkey}']
        if year == '2016':
            corrections[
                f'muon_iso_{isonum}_{isodenom}']._values *= lumi2016_BCDEF
            corrections[f'muon_iso_{isonum}_{isodenom}']._values += evaluator[
                f'muon_iso_2_{histkey}']._values * lumi2016_GH

    for jpsinum in jpsinums:
        histkey = f'NUM_{jpsinum}_DEN_{jpsidenom}_{jpsieffvars}'
        corrections[f'muon_id_jpsi_{jpsinum}'] = evaluator[
            f'muon_id_jpsi_{histkey}']
        if year == '2016':
            corrections[f'muon_id_jpsi_{jpsinum}']._values *= lumi2016_BCDEF
            corrections[f'muon_id_jpsi_{jpsinum}']._values += evaluator[
                f'muon_id_jpsi_2_{histkey}']._values * lumi2016_GH

    # pileup
    # from NanoAOD tools
    # 2016 has a bug
    #with uproot.open(f'data/pileup/dataPileup{year}.root') as f:
    #    norm = lambda x: x/x.sum()
    #    edges = f['pileup'].edges
    #    dataPileup = norm(f['pileup'].values)
    #    dataPileupUp = norm(f['pileup_plus'].values)
    #    dataPileupDown = norm(f['pileup_minus'].values)
    #with uproot.open(f'data/pileup/mcPileup{year}.root') as f:
    #    mcPileup = f['pu_mc'].values
    #def zeropad(a,n):
    #    _a = np.zeros(n)
    #    _a[:len(a)] = a
    #    return _a
    #nmax = max(len(dataPileup),len(mcPileup))
    #dataPileup = zeropad(dataPileup,nmax)
    #mcPileup = zeropad(mcPileup,nmax)
    #mask = (mcPileup>0)
    #pileupRatio = dataPileup.copy()
    #pileupRatioUp = dataPileupUp.copy()
    #pileupRatioDown = dataPileupDown.copy()
    #pileupRatio[mask] /= mcPileup[mask]
    #pileupRatioUp[mask] /= mcPileup[mask]
    #pileupRatioDown[mask] /= mcPileup[mask]
    # from HZZ
    with uproot.open(f'data/pileup/pu_weights_{year}.root') as f:
        edges = f['weights'].edges
        pileupRatio = f['weights'].values
        pileupRatioUp = f['weights_varUp'].values
        pileupRatioDown = f['weights_varDn'].values

    corrections['pileupWeight'] = lookup_tools.dense_lookup.dense_lookup(
        pileupRatio, edges)
    corrections['pileupWeightUp'] = lookup_tools.dense_lookup.dense_lookup(
        pileupRatioUp, edges)
    corrections['pileupWeightDown'] = lookup_tools.dense_lookup.dense_lookup(
        pileupRatioDown, edges)

    # rochester correction
    tag = 'roccor.Run2.v3'
    fname = f'data/rochester/{tag}/RoccoR{year}.txt'
    corrections[
        'rochester_data'] = lookup_tools.txt_converters.convert_rochester_file(
            fname, loaduncs=True)

    save(corrections, f'corrections/corrections_{year}.coffea')
示例#25
0
def main():
  import argparse
  parser = argparse.ArgumentParser(description='Create dict with files and options')
  parser.add_argument('cfgfile'           , default=''           , help = 'Config file with dataset names')
  parser.add_argument('--pretend','-p'    , action='store_true'  , help = 'Create the files but not send the jobs')
  parser.add_argument('--test','-t'       , action='store_true'  , help = 'Sends only one or two jobs, as a test')
  parser.add_argument('--verbose','-v'    , action='store_true'  , help = 'Activate the verbosing')
  parser.add_argument('--path'            , default=''           , help = 'Path to look for nanoAOD')
  parser.add_argument('--sample','-s'     , default=''           , help = 'Sample(s) to process')
  parser.add_argument('--xsec','-x'       , default='xsec'       , help = 'Cross section')
  parser.add_argument('--year','-y'       , default=-1           , help = 'Year')
  parser.add_argument('--options'         , default=''           , help = 'Options to pass to your analysis')
  parser.add_argument('--treename'        , default='Events'     , help = 'Name of the tree')
  parser.add_argument('--nFiles'          , default=None         , help = 'Number of max files (for the moment, only applies for DAS)')

  args, unknown = parser.parse_known_args()
  cfgfile     = args.cfgfile
  verbose     = args.verbose
  pretend     = args.pretend
  dotest      = args.test
  sample      = args.sample
  path        = args.path
  options     = args.options
  xsec        = args.xsec
  year        = args.year
  treeName    = args.treename

  samplefiles = {}
  fileopt = {}
  xsecdic = {}
  sampdic = {}

  if not os.path.isfile(cfgfile) and os.path.isfile(cfgfile+'.cfg'): cfgfile+='.cfg'
  f = open(cfgfile)
  lines = f.readlines()
  for l in lines:
    l = l.replace(' ', '')
    l = l.replace('\n', '')
    if l.startswith('#'): continue
    if '#' in l: l = l.split('#')[0]
    if l == '': continue
    if l.endswith(':'): l = l[:-1]
    if not ':' in l:
      if l in ['path', 'verbose', 'pretend', 'test', 'options', 'xsec', 'year', 'treeName']: continue
      else: samplefiles[l]=l
    else:
      lst = l.split(':')
      key = lst[0]
      val = lst[1] if lst[1] != '' else lst[0]
      if   key == 'pretend'   : pretend   = 1
      elif key == 'verbose'   : verbose   = int(val) if val.isdigit() else 1
      elif key == 'test'      : dotest    = 1
      elif key == 'path'      :
        path      = val
        if len(lst) > 2: 
          for v in lst[2:]: path += ':'+v
      elif key == 'options'   : options   = val
      elif key == 'xsec'      : xsec      = val
      elif key == 'year'      : year      = int(val)
      elif key == 'treeName'  : treeName  = val
      else:
        fileopt[key] = ''#options
        if len(lst) >= 3: fileopt[key] += lst[2]
        samplefiles[key] = val

  # Re-assign arguments...
  aarg = sys.argv
  if '--pretend' in aarg or '-p' in aarg : pretend     = args.pretend
  if '--test'    in aarg or '-t' in aarg : dotest      = args.test
  if args.path       != ''       : path        = args.path
  if args.options    != ''       : options     = args.options
  if args.xsec       != 'xsec'   : xsec        = args.xsec
  if args.year       != -1       : year        = args.year
  if args.treename   != 'Events' : treeName    = args.treename
  if args.verbose    != 0        : verbose     = int(args.verbose)
  xsecdic = loadxsecdic(xsec, verbose)

  for sname in samplefiles.keys():
    sampdic[sname] = {}
    sampdic[sname]['xsec']       = xsecdic[sname] if sname in xsecdic.keys() else 1
    sampdic[sname]['year']       = year
    sampdic[sname]['treeName']   = treeName
    if 'DAS' in options:
      dataset = samplefiles[sname]
      nFiles = int(fileopt[sname]) if fileopt[sname]!='' else None
      #dicFiles = GetDatasetFromDAS(dataset, nFiles, options='file', withRedirector='root://cms-xrd-global.cern.ch/')
      dicFiles = GetDatasetFromDAS(dataset, nFiles, options='file', withRedirector=path)
      nEvents, nGenEvents, nSumOfWeights, isData = GetAllInfoFromFile(dicFiles['files'], sampdic[sname]['treeName'])
      files          = dicFiles['files']
      nEvents        = dicFiles['events']
      fileOptions = ''
    else:
      files = GetFiles(path, samplefiles[sname])
      nEvents, nGenEvents, nSumOfWeights, isData = GetAllInfoFromFile(files, sampdic[sname]['treeName'])
      extraOption = GetOptions(path, files[0].split('/')[-1])
      fileOptions = fileopt[sname]+','+extraOption
    sampdic[sname]['options']    = fileOptions
    sampdic[sname]['files']      = files
    sampdic[sname]['nEvents']       = nEvents
    sampdic[sname]['nGenEvents']    = nGenEvents
    sampdic[sname]['nSumOfWeights'] = nSumOfWeights
    sampdic[sname]['isData']        = isData

  if verbose:
    for sname in samplefiles.keys():
      print('>> '+sname)
      print('   - isData?    : %s'   %('YES' if sampdic[sname]['isData'] else 'NO'))
      print('   - year       : %i'   %sampdic[sname]['year'])
      print('   - xsec       : %1.3f'%sampdic[sname]['xsec'])
      print('   - options    : %s'   %sampdic[sname]['options'])
      print('   - tree       : %s'   %sampdic[sname]['treeName'])
      print('   - nEvents    : %i'   %sampdic[sname]['nEvents'])
      print('   - nGenEvents : %i'   %sampdic[sname]['nGenEvents'])
      print('   - SumWeights : %i'   %sampdic[sname]['nSumOfWeights'])
      print('   - nFiles     : %i'   %len(sampdic[sname]['files']))
      for fname in sampdic[sname]['files']: print('     %s'%fname)
  save(sampdic, '.samples.coffea')

  return sampdic
示例#26
0
            'METFactory' : data_met_factory,
        }

        # MC
            # get jec, junc, jr, jersf
    MC_JECcorrector = FactorizedJetCorrector(**{name: Jetevaluator[name] for name in ['%s_%s_%s' % (jec_mc_tag, level, jet_type) for level in jec_levels_MC]})
    MC_JECuncertainties = JetCorrectionUncertainty(**{name:Jetevaluator[name] for name in Jetevaluator.keys() if name.startswith('%s_%s_%s' % (jec_mc_tag, jecfiles['Unc'], jet_type))})
    MC_JER = JetResolution(**{name:Jetevaluator[name] for name in ['%s_MC_%s_%s' % (jer_tag, jerfiles[year]['JER'], jet_type)]})
    MC_JERsf = JetResolutionScaleFactor(**{name:Jetevaluator[name] for name in ['%s_MC_%s_%s' % (jer_tag, jerfiles[year]['JERSF'], jet_type)]})
        # make JEC stack of all corrections
    #print("JER/JERSF set to None")
    #set_trace()
    #MC_JECStack = JECStack({}, jec=MC_JECcorrector, junc=MC_JECuncertainties, jer=None, jersf=None)
    MC_JECStack = JECStack({}, jec=MC_JECcorrector, junc=MC_JECuncertainties, jer=MC_JER, jersf=MC_JERsf)
        # make jet and met factory
    MC_name_map = make_name_map(MC_JECStack, isMC=True)
    MC_jet_factory = CorrectedJetsFactory(MC_name_map, MC_JECStack)
    MC_met_factory = CorrectedMETFactory(MC_name_map)
    jet_corrections[year]['MC'] = {
        'JetsFactory' : MC_jet_factory,
        'METFactory' : MC_met_factory,
    }
    print('Jet corrections for %s saved' % year)


fname = os.path.join(proj_dir, 'Corrections', base_jobid, 'JetMETCorrections_UncSources.coffea') if args.split_uncs else os.path.join(proj_dir, 'Corrections', base_jobid, 'JetMETCorrections.coffea')
if args.test: fname = os.path.join(proj_dir, 'test_jetmet.coffea')

save(jet_corrections, fname)
print('\n%s written' % fname)
    if (i + chunk_size < len(names)):
        flist = [util.load(x) for x in names[i:i + chunk_size]]
    else:
        flist = [util.load(x) for x in names[i:]]

    for key in flist[0]:
        if isinstance(key, hist.Hist):
            for fi in range(1, len(flist)):
                flist[0][key].add(flist[fi][key])
        else:
            for fi in range(1, len(flist)):
                flist[0][key] = flist[0][key] + flist[fi][key]

    print(flist[0])
    flist[0]['templates'] = flist[0]['templates'].sum('pt', overflow='allnan')
    util.save(flist[0], '%s/hists_sum_%i.coffea' % (indir, i))

    for f in flist:
        del f

    chunk_names.append('%s/hists_sum_%i.coffea' % (indir, i))

print(chunk_names)

flist = [util.load(x) for x in chunk_names]

for key in flist[0]:
    if isinstance(key, hist.Hist):
        for fi in range(1, len(flist)):
            flist[0][key].add(flist[fi][key])
    else:
示例#28
0
    for k, v in output['nevents'].items():
        if k in dataset_nevents:
            dataset_nevents[k] += v
        else:
            dataset_nevents[k] = v
        total_events += v

    print("Cutflow:")
    for dataset, d1 in output["reco_cutflow"].items():
        print(f"\tDataset={dataset}")
        print(f"\t\tnevents => {dataset_nevents[dataset]}")
        for cut_name, cut_npass in d1.items():
            print(
                f"\t\t{cut_name} => {cut_npass} = {cut_npass / dataset_nevents[dataset]}"
            )

    print("Truth cutflow:")
    for dataset, d1 in output["truth_cutflow"].items():
        print(f"\tDataset={dataset}")
        print(f"\t\tnevents => {dataset_nevents[dataset]}")
        for cut_name, cut_npass in d1.items():
            print(
                f"\t\t{cut_name} => {cut_npass} = {cut_npass / d1['inclusive']}"
            )

    util.save(output, f"MCEfficiencyHistograms.coffea")

    print("Total time: {} seconds".format(ts_end - ts_start))
    print("Total rate: {} Hz".format(total_events / (ts_end - ts_start)))
    print("Total nevents: {}".format(total_events))
示例#29
0
    Muon_dxy
    Muon_dz
    Muon_tightCharge
    Muon_mvaTTH
    Jet_pt
    Jet_eta
    Jet_phi
    Jet_mass
    Jet_btagDeepB
    Jet_btagDeepFlavB
    Jet_jetId
    Jet_neHEF
    Jet_neEmEF
    Jet_chHEF
    Jet_chEmEF
    GenPart_pt
    GenPart_eta
    GenPart_phi
    GenPart_mass
    GenPart_pdgId
    GenPart_status
    GenPart_statusFlags
    GenPart_genPartIdxMother
    PV_npvs
 
    '''.split()

    topprocessor = AnalysisProcessor(samples, objects, selection, corrections,
                                     functions, columns)
    save(topprocessor, outpath + 'topeft.coffea')
示例#30
0
def run(args):
    tstart = time.time()
    mcEventYields = {
        'DYjetsM10to50_2016': 35114961.0,
        'DYjetsM50_2016': 146280395.0,
        'GJets_HT40To100_2016': 9326139.0,
        'GJets_HT100To200_2016': 10104155.0,
        'GJets_HT200To400_2016': 20527506.0,
        'GJets_HT400To600_2016': 5060070.0,
        'GJets_HT600ToInf_2016': 5080857.0,
        'QCD_Pt20to30_Ele_2016': 9241500.0,
        'QCD_Pt30to50_Ele_2016': 11508842.0,
        'QCD_Pt50to80_Ele_2016': 45789059.0,
        'QCD_Pt80to120_Ele_2016': 77800204.0,
        'QCD_Pt120to170_Ele_2016': 75367655.0,
        'QCD_Pt170to300_Ele_2016': 11105095.0,
        'QCD_Pt300toInf_Ele_2016': 7090318.0,
        'QCD_Pt20to30_Mu_2016': 31878740.0,
        'QCD_Pt30to50_Mu_2016': 29936360.0,
        'QCD_Pt50to80_Mu_2016': 19662175.0,
        'QCD_Pt80to120_Mu_2016': 23686772.0,
        'QCD_Pt120to170_Mu_2016': 7897731.0,
        'QCD_Pt170to300_Mu_2016': 17350231.0,
        'QCD_Pt300to470_Mu_2016': 49005976.0,
        'QCD_Pt470to600_Mu_2016': 19489276.0,
        'QCD_Pt600to800_Mu_2016': 9981311.0,
        'QCD_Pt800to1000_Mu_2016': 19940747.0,
        'QCD_Pt1000toInf_Mu_2016': 13608903.0,
        'ST_s_channel_2016': 6137801.0,
        'ST_tW_channel_2016': 4945734.0,
        'ST_tbarW_channel_2016': 4942374.0,
        'ST_tbar_channel_2016': 17780700.0,
        'ST_t_channel_2016': 31848000.0,
        'TTGamma_Dilepton_2016': 5728644.0,
        'TTGamma_Hadronic_2016': 5635346.0,
        'TTGamma_SingleLept_2016': 10991612.0,
        'TTWtoLNu_2016': 2716249.0,
        'TTWtoQQ_2016': 430310.0,
        'TTZtoLL_2016': 6420825.0,
        'TTbarPowheg_Dilepton_2016': 67339946.0,
        'TTbarPowheg_Hadronic_2016': 67963984.0,
        'TTbarPowheg_Semilept_2016': 106438920.0,
        'W1jets_2016': 45283121.0,
        'W2jets_2016': 60438768.0,
        'W3jets_2016': 59300029.0,
        'W4jets_2016': 29941394.0,
        'WGamma_01J_5f_2016': 6103817.0,
        'ZGamma_01J_5f_lowMass_2016': 9696539.0,
        'WW_2016': 7982180.0,
        'WZ_2016': 3997571.0,
        'ZZ_2016': 1988098.0
    }

    outputData = processor.run_uproot_job(
        setup_fileset(args),
        treename='Events',
        processor_instance=TTGammaProcessor(mcEventYields=mcEventYields),
        executor=processor.iterative_executor,
        executor_args={
            'flatten': True,
            'status': args.debug,
        },
        chunksize=args.chunksize,
        maxchunks=args.maxchunks,
    )

    util.save(outputData, args.output)

    elapsed = time.time() - tstart
    if args.debug:
        print("Total time: %.1f seconds" % elapsed)
        print("Total rate: %.1f events / second" %
              (outputData['EventCount'].value / elapsed))