Пример #1
0
def analyze_data_function(data, parameters):
    ret = Results()

    num_events = data["num_events"]
    muons = data["Muon"]
    mu_pt = nplib.sqrt(muons.Px**2 + muons.Py**2)
    muons.attrs_data["pt"] = mu_pt

    mask_events = nplib.ones(muons.numevents(), dtype=nplib.bool)
    mask_muons_passing_pt = muons.pt > parameters["muons_ptcut"]
    num_muons_event = kernels.sum_in_offsets(backend, muons.offsets,
                                             mask_muons_passing_pt,
                                             mask_events, muons.masks["all"],
                                             nplib.int8)
    mask_events_dimuon = num_muons_event == 2

    #get the leading muon pt in events that have exactly two muons
    inds = nplib.zeros(num_events, dtype=nplib.int32)
    leading_muon_pt = kernels.get_in_offsets(backend, muons.offsets, muons.pt,
                                             inds, mask_events_dimuon,
                                             mask_muons_passing_pt)

    #compute a weighted histogram
    weights = nplib.ones(num_events, dtype=nplib.float32)
    bins = nplib.linspace(0, 300, 101, dtype=nplib.float32)
    hist_muons_pt = Histogram(*kernels.histogram_from_vector(
        backend, leading_muon_pt[mask_events_dimuon],
        weights[mask_events_dimuon], bins))

    #save it to the output
    ret["hist_leading_muon_pt"] = hist_muons_pt
    return ret
Пример #2
0
def load_and_analyze(args_tuple):
    fn, args, dataset, entrystart, entrystop, ismc, ichunk = args_tuple
    this_worker = get_worker_wrapper()
    NUMPY_LIB, backend = hepaccelerate.choose_backend(args.use_cuda)

    print("Loading {0}".format(fn))
    ds, timing_results = load_dataset(
        args.datapath,
        fn,
        ismc,
        args.nthreads,
        args.skim,
        NUMPY_LIB,
        backend,
        entrystart,
        entrystop,
    )
    t0 = time.time()
    ret = run_analysis(ds, "{0}_{1}".format(dataset, ichunk),
                       this_worker.dnnmodel, args.use_cuda, ismc)
    t1 = time.time()
    ret["timing"] = Results(timing_results)
    ret["timing"]["run_analysis"] = t1 - t0
    ret["timing"]["num_events"] = ds.numevents()
    return ret
Пример #3
0
def merge_partial_results(dataset_name: str, dataset_era: str, outpath: str,
                          outpath_partial: str):
    """Merges the output from separate jobs for each dataset.
    
    Args:
        dataset_name (str): Name of the dataset
        dataset_era (str): Dataset era
        outpath (str): Directory with the output results
        outpath_partial (str): Directory with the partial input results
    """
    results = []
    partial_results = glob.glob(
        outpath_partial + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era))
    print("Merging {0} partial results for dataset {1}_{2}".format(
        len(partial_results), dataset_name, dataset_era))

    #Load all thge partial results
    for res_file in partial_results:
        res = pickle.load(open(res_file, "rb"))
        results += [res]

    #Merge the partial results
    results = sum(results, Results({}))

    #Create output directory if it does not exist
    try:
        os.makedirs(outpath + "/results")
    except FileExistsError:
        print("Output directory {} already exists".format(outpath))

    result_filename = outpath + "/results/{0}_{1}.pkl".format(
        dataset_name, dataset_era)
    print("Saving results to {0}".format(result_filename))
    with open(result_filename, "wb") as fi:
        pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL)
Пример #4
0
def create_variated_histos(hdict,
                           baseline="nominal",
                           variations=shape_systematics):

    if not baseline in hdict.keys():
        raise KeyError("baseline histogram missing")

    #hbase = copy.deepcopy(hdict[baseline])
    hbase = hdict[baseline]
    ret = Results(OrderedDict())
    ret["nominal"] = hbase
    for variation in variations:
        for vdir in ["up", "down"]:
            #print("create_variated_histos", variation, vdir)
            sname = "{0}__{1}".format(variation, vdir)
            if sname.endswith("__up"):
                sname2 = sname.replace("__up", "Up")
            elif sname.endswith("__down"):
                sname2 = sname.replace("__down", "Down")

            if sname not in hdict:
                #print("systematic", sname, "not found, taking baseline")
                hret = hbase
            else:
                hret = hdict[sname]
            ret[sname2] = hret
    return ret
Пример #5
0
def create_variated_histos(proc,
                           hdict,
                           baseline="nominal",
                           variations=shape_systematics):

    if not baseline in hdict.keys():
        raise KeyError("baseline histogram missing")

    #hbase = copy.deepcopy(hdict[baseline])
    hbase = hdict[baseline]
    ret = Results(OrderedDict())
    ret["nominal"] = hbase
    for variation in variations:
        for vdir in ["up", "down"]:
            #print("create_variated_histos", variation, vdir)
            sname = "{0}__{1}".format(variation, vdir)
            if sname.endswith("__up"):
                sname2 = sname.replace("__up", "Up")
            elif sname.endswith("__down"):
                sname2 = sname.replace("__down", "Down")

            if sname not in hdict:
                #print("systematic", sname, "not found, taking baseline")
                hret = hbase
            else:
                hret = hdict[sname]
            ret[sname2] = hret
    if (('DYLHEScaleWeight' in variations)
            or ('EWZLHEScaleWeight' in variations)):
        h_lhe = []
        h_nom_up = copy.deepcopy(hbase)
        h_nom_down = copy.deepcopy(hbase)
        for i in range(9):
            sname = 'LHEScaleWeight__{0}'.format(i)
            h_lhe.append(hdict[sname])
        for k in range(len(h_lhe[0].contents)):
            for i in range(9):
                if (h_lhe[i].contents[k] > h_nom_up.contents[k]):
                    h_nom_up.contents[k] = h_lhe[i].contents[k]
                if (h_lhe[i].contents[k] < h_nom_down.contents[k]):
                    h_nom_down.contents[k] = h_lhe[i].contents[k]
        #remove the normalization aspect from QCD scale
        sum_nom_up = np.sum(h_nom_up.contents)
        sum_nom_down = np.sum(h_nom_down.contents)
        for k in range(len(h_nom_up.contents)):
            h_nom_up.contents[k] = h_nom_up.contents[k] * np.sum(
                hbase.contents) / sum_nom_up
            h_nom_down.contents[k] = h_nom_down.contents[k] * np.sum(
                hbase.contents) / sum_nom_down

        if ('dy' in proc):
            ret['DYLHEScaleWeightUp'] = h_nom_up
            ret['DYLHEScaleWeightDown'] = h_nom_down
        elif ('ewk' in proc):
            ret['EWZLHEScaleWeightUp'] = h_nom_up
            ret['EWZLHEScaleWeightDown'] = h_nom_down
    return ret
Пример #6
0
def create_datacard(dict_procs, parameter_name, all_processes, histname,
                    baseline, variations, weight_xs):

    ret = Results(OrderedDict())
    event_counts = {}
    hists_mc = []
    for pid, pid_procs in proc_grps:
        event_counts[pid] = 0
    for proc in all_processes:
        #print("create_datacard", proc)
        rr = dict_procs[proc]
        _variations = variations

        #don't produce variated histograms for data
        if proc == "data":
            _variations = []

        variated_histos = create_variated_histos(proc, rr, baseline,
                                                 _variations)

        for syst_name, histo in variated_histos.items():
            if proc != "data":
                histo = histo * weight_xs[proc]

            if syst_name == "nominal":
                found_proc = 0
                for pid, pid_procs in proc_grps:

                    if proc in pid_procs:
                        event_counts[pid] += np.sum(histo.contents)
                        found_proc = 1
                        #print(pid,proc, syst_name, np.sum(histo.contents))

                if proc != "data":
                    hists_mc += [histo]
                if found_proc == 0:
                    event_counts[proc] = np.sum(histo.contents)
            #create histogram name for combine datacard

            hist_name = "{0}__{2}".format(proc, histname, syst_name)
            if hist_name == "data__nominal":
                hist_name = "data_obs"
            hist_name = hist_name.replace("__nominal", "")

            ret[hist_name] = copy.deepcopy(histo)
    assert (len(hists_mc) > 0)
    hist_mc_tot = copy.deepcopy(hists_mc[0])
    for h in hists_mc[:1]:
        hist_mc_tot += h
    ret["data_fake"] = hist_mc_tot
    ret_g = group_samples_datacard(ret, proc_grps)
    return ret_g, event_counts
def merge_partial_results(dataset_name, dataset_era, outpath):
    results = []
    partial_results = glob.glob(outpath + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era))
    print("Merging {0} partial results for dataset {1}_{2}".format(len(partial_results), dataset_name, dataset_era))
    for res_file in partial_results:
        res = pickle.load(open(res_file, "rb"))
        results += [res]
    results = sum(results, Results({}))
    try:
        os.makedirs(args.out + "/results")
    except FileExistsError as e:
        pass
    result_filename = args.out + "/results/{0}_{1}.pkl".format(dataset_name, dataset_era)
    print("Saving results to {0}".format(result_filename))
    with open(result_filename, "wb") as fi:
        pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL) 
    return
Пример #8
0
def run_analysis(dataset, out, dnnmodel, use_cuda, ismc):
    from keras.backend.tensorflow_backend import set_session

    this_worker = get_worker_wrapper()
    NUMPY_LIB = this_worker.NUMPY_LIB
    backend = this_worker.backend
    hists = {}
    histo_bins = {
        "nmu": np.array([0, 1, 2, 3], dtype=np.float32),
        "njet": np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=np.float32),
        "mu_pt": np.linspace(0, 300, 20),
        "mu_eta": np.linspace(-5, 5, 20),
        "mu_phi": np.linspace(-5, 5, 20),
        "mu_iso": np.linspace(0, 1, 20),
        "mu_charge": np.array([-1, 0, 1], dtype=np.float32),
        "met_pt": np.linspace(0, 200, 20),
        "jet_pt": np.linspace(0, 400, 20),
        "jet_eta": np.linspace(-5, 5, 20),
        "jet_phi": np.linspace(-5, 5, 20),
        "jet_btag": np.linspace(0, 1, 20),
        "dnnpred_m": np.linspace(0, 1, 20),
        "dnnpred_s": np.linspace(0, 0.2, 20),
        "inv_mass": np.linspace(150, 200, 20),
        "sumpt": np.linspace(0, 1000, 20),
    }

    t0 = time.time()

    i = 0

    mu = dataset.structs["Muon"][i]
    el = dataset.structs["Electron"][i]
    jets = dataset.structs["Jet"][i]
    evvars = dataset.eventvars[i]

    mu.hepaccelerate_backend = backend
    el.hepaccelerate_backend = backend
    jets.hepaccelerate_backend = backend

    evs_all = NUMPY_LIB.ones(dataset.numevents(), dtype=NUMPY_LIB.bool)

    print("Lepton selection")
    sel_mu, sel_ev_mu = get_selected_muons(mu, 40, 20, 2.4, 0.1)
    sel_ev_mu = sel_ev_mu & (evvars["HLT_IsoMu24"] == True)
    mu.masks["selected"] = sel_mu
    sel_el, sel_ev_el = get_selected_electrons(el, 40, 20, 2.4, 0.1)
    el.masks["selected"] = sel_el

    nmu = ha_kernels.sum_in_offsets(
        backend,
        mu.offsets,
        mu.masks["selected"],
        evs_all,
        mu.masks["all"],
        dtype=NUMPY_LIB.int32,
    )
    nel = ha_kernels.sum_in_offsets(
        backend,
        el.offsets,
        el.masks["selected"],
        evs_all,
        el.masks["all"],
        dtype=NUMPY_LIB.int32,
    )

    # get contiguous arrays of the first two muons for all events
    mu1 = mu.select_nth(0, object_mask=sel_mu)
    mu2 = mu.select_nth(1, object_mask=sel_mu)
    el1 = el.select_nth(0, object_mask=sel_el)
    el2 = el.select_nth(1, object_mask=sel_el)

    weight_ev_mu = apply_lepton_corrections(mu, sel_mu,
                                            this_worker.electron_weights)
    weight_ev_el = apply_lepton_corrections(el, sel_el,
                                            this_worker.electron_weights)

    weights = {"nominal": weight_ev_mu * weight_ev_el}

    weights_jet = {}
    for k in weights.keys():
        weights_jet[k] = NUMPY_LIB.zeros_like(jets.pt)
        ha_kernels.broadcast(backend, jets.offsets, weights["nominal"],
                             weights_jet[k])

    all_jecs = [("nominal", "", None)]
    if ismc:
        for i in range(this_worker.jecs_up.shape[1]):
            all_jecs += [(i, "up", this_worker.jecs_up[:, i])]
            all_jecs += [(i, "down", this_worker.jecs_down[:, i])]

    jets_pt_orig = NUMPY_LIB.copy(jets.pt)

    # per-event histograms
    fill_histograms_several(
        hists,
        "nominal",
        "hist__all__",
        [
            (evvars["MET_pt"], "met_pt", histo_bins["met_pt"]),
        ],
        evs_all,
        weights,
        use_cuda,
    )

    fill_histograms_several(
        hists,
        "nominal",
        "hist__all__",
        [
            (jets.pt, "jets_pt", histo_bins["jet_pt"]),
        ],
        jets.masks["all"],
        weights_jet,
        use_cuda,
    )

    print("Jet selection")
    # loop over the jet corrections
    for ijec, sdir, jec in all_jecs:
        systname = "nominal"
        if ijec != "nominal":
            systname = ("jec{0}".format(ijec), sdir)

        if not jec is None:
            jet_pt_corr = apply_jec(jets_pt_orig, this_worker.jecs_bins, jec)
            # compute the corrected jet pt
            jets.pt = jets_pt_orig * NUMPY_LIB.abs(jet_pt_corr)
        print("jec", ijec, sdir, jets.pt.mean())

        # get selected jets
        sel_jet, sel_bjet = select_jets(jets, mu, el, sel_mu, sel_el, 40, 2.0,
                                        0.3, 0.4)

        # compute the number of jets per event
        njet = ha_kernels.sum_in_offsets(
            backend,
            jets.offsets,
            sel_jet,
            evs_all,
            jets.masks["all"],
            dtype=NUMPY_LIB.int32,
        )
        nbjet = ha_kernels.sum_in_offsets(
            backend,
            jets.offsets,
            sel_bjet,
            evs_all,
            jets.masks["all"],
            dtype=NUMPY_LIB.int32,
        )

        inv_mass_3j = NUMPY_LIB.zeros(jets.numevents(),
                                      dtype=NUMPY_LIB.float32)
        best_comb_3j = NUMPY_LIB.zeros((jets.numevents(), 3),
                                       dtype=NUMPY_LIB.int32)

        if use_cuda:
            this_worker.kernels.comb_3_invmass_closest[32, 256](
                jets.pt,
                jets.eta,
                jets.phi,
                jets.mass,
                jets.offsets,
                172.0,
                inv_mass_3j,
                best_comb_3j,
            )
            cuda.synchronize()
        else:
            this_worker.kernels.comb_3_invmass_closest(
                jets.pt,
                jets.eta,
                jets.phi,
                jets.mass,
                jets.offsets,
                172.0,
                inv_mass_3j,
                best_comb_3j,
            )

        best_btag = NUMPY_LIB.zeros(jets.numevents(), dtype=NUMPY_LIB.float32)
        if use_cuda:
            this_worker.kernels.max_val_comb[32, 1024](jets.btag, jets.offsets,
                                                       best_comb_3j, best_btag)
            cuda.synchronize()
        else:
            this_worker.kernels.max_val_comb(jets.btag, jets.offsets,
                                             best_comb_3j, best_btag)

        # get the events with at least three jets
        sel_ev_jet = njet >= 3
        sel_ev_bjet = nbjet >= 1

        selected_events = (sel_ev_mu | sel_ev_el) & sel_ev_jet & sel_ev_bjet
        print("Selected {0} events".format(selected_events.sum()))

        # get contiguous vectors of the first two jet data
        jet1 = jets.select_nth(0, object_mask=sel_jet)
        jet2 = jets.select_nth(1, object_mask=sel_jet)
        jet3 = jets.select_nth(2, object_mask=sel_jet)

        # create a mask vector for the first two jets
        first_two_jets = NUMPY_LIB.zeros_like(sel_jet)
        inds = NUMPY_LIB.zeros_like(evs_all, dtype=NUMPY_LIB.int32)
        targets = NUMPY_LIB.ones_like(evs_all, dtype=NUMPY_LIB.int32)
        inds[:] = 0
        ha_kernels.set_in_offsets(
            backend,
            jets.offsets,
            first_two_jets,
            inds,
            targets,
            selected_events,
            sel_jet,
        )
        inds[:] = 1
        ha_kernels.set_in_offsets(
            backend,
            jets.offsets,
            first_two_jets,
            inds,
            targets,
            selected_events,
            sel_jet,
        )

        # compute the invariant mass of the first two jets
        dijet_inv_mass, dijet_pt = compute_inv_mass(jets, selected_events,
                                                    sel_jet & first_two_jets,
                                                    use_cuda)

        sumpt_jets = ha_kernels.sum_in_offsets(backend, jets.offsets, jets.pt,
                                               selected_events, sel_jet)

        # create a keras-like array
        arr = NUMPY_LIB.vstack([
            nmu,
            nel,
            njet,
            dijet_inv_mass,
            dijet_pt,
            mu1["pt"],
            mu1["eta"],
            mu1["phi"],
            mu1["charge"],
            mu1["pfRelIso03_all"],
            mu2["pt"],
            mu2["eta"],
            mu2["phi"],
            mu2["charge"],
            mu2["pfRelIso03_all"],
            el1["pt"],
            el1["eta"],
            el1["phi"],
            el1["charge"],
            el1["pfRelIso03_all"],
            el2["pt"],
            el2["eta"],
            el2["phi"],
            el2["charge"],
            el2["pfRelIso03_all"],
            jet1["pt"],
            jet1["eta"],
            jet1["phi"],
            jet1["btag"],
            jet2["pt"],
            jet2["eta"],
            jet2["phi"],
            jet2["btag"],
            inv_mass_3j,
            best_btag,
            sumpt_jets,
        ]).T

        # print("evaluating DNN model")
        with this_worker.graph.as_default():
            set_session(this_worker.session)
            pred = dnnmodel.eval(arr, use_cuda)
            pred = NUMPY_LIB.vstack(pred).T
            pred_m = NUMPY_LIB.mean(pred, axis=1)
            pred_s = NUMPY_LIB.std(pred, axis=1)

        fill_histograms_several(
            hists,
            systname,
            "hist__nmu1_njetge3_nbjetge1__",
            [
                (pred_m, "pred_m", histo_bins["dnnpred_m"]),
                (pred_s, "pred_s", histo_bins["dnnpred_s"]),
                (nmu, "nmu", histo_bins["nmu"]),
                (nel, "nel", histo_bins["nmu"]),
                (njet, "njet", histo_bins["njet"]),
                (mu1["pt"], "mu1_pt", histo_bins["mu_pt"]),
                (mu1["eta"], "mu1_eta", histo_bins["mu_eta"]),
                (mu1["phi"], "mu1_phi", histo_bins["mu_phi"]),
                (mu1["charge"], "mu1_charge", histo_bins["mu_charge"]),
                (mu1["pfRelIso03_all"], "mu1_iso", histo_bins["mu_iso"]),
                (mu2["pt"], "mu2_pt", histo_bins["mu_pt"]),
                (mu2["eta"], "mu2_eta", histo_bins["mu_eta"]),
                (mu2["phi"], "mu2_phi", histo_bins["mu_phi"]),
                (mu2["charge"], "mu2_charge", histo_bins["mu_charge"]),
                (mu2["pfRelIso03_all"], "mu2_iso", histo_bins["mu_iso"]),
                (el1["pt"], "el1_pt", histo_bins["mu_pt"]),
                (el1["eta"], "el1_eta", histo_bins["mu_eta"]),
                (el1["phi"], "el1_phi", histo_bins["mu_phi"]),
                (el1["charge"], "el1_charge", histo_bins["mu_charge"]),
                (el1["pfRelIso03_all"], "el1_iso", histo_bins["mu_iso"]),
                (el2["pt"], "el2_pt", histo_bins["mu_pt"]),
                (el2["eta"], "el2_eta", histo_bins["mu_eta"]),
                (el2["phi"], "el2_phi", histo_bins["mu_phi"]),
                (el2["charge"], "el2_charge", histo_bins["mu_charge"]),
                (el2["pfRelIso03_all"], "el2_iso", histo_bins["mu_iso"]),
                (jet1["pt"], "j1_pt", histo_bins["jet_pt"]),
                (jet1["eta"], "j1_eta", histo_bins["jet_eta"]),
                (jet1["phi"], "j1_phi", histo_bins["jet_phi"]),
                (jet1["btag"], "j1_btag", histo_bins["jet_btag"]),
                (jet2["pt"], "j2_pt", histo_bins["jet_pt"]),
                (jet2["eta"], "j2_eta", histo_bins["jet_eta"]),
                (jet2["phi"], "j2_phi", histo_bins["jet_phi"]),
                (jet2["btag"], "j2_btag", histo_bins["jet_btag"]),
                (inv_mass_3j, "inv_mass_3j", histo_bins["inv_mass"]),
                (best_btag, "best_btag", histo_bins["jet_btag"]),
                (sumpt_jets, "sumpt", histo_bins["sumpt"]),
            ],
            selected_events,
            weights,
            use_cuda,
        )

        # save the array for the first jet correction scenario only
        if save_arrays and ijec == 0:
            outfile_arr = "{0}_arrs.npy".format(out)
            print("Saving array with shape {0} to {1}".format(
                arr.shape, outfile_arr))
            with open(outfile_arr, "wb") as fi:
                np.save(fi, NUMPY_LIB.asnumpy(arr))

    t1 = time.time()

    res = Results({})
    for hn in hists.keys():
        hists[hn] = Results(hists[hn])
    res["hists"] = Results(hists)
    res["numevents"] = dataset.numevents()

    speed = dataset.numevents() / (t1 - t0)
    print("run_analysis: {0:.2E} events in {1:.2f} seconds, speed {2:.2E} Hz".
          format(dataset.numevents(), t1 - t0, speed))
    return res
Пример #9
0
    if args.dask_server == "debug":
        ret = map(load_and_analyze, arglist)
    else:
        futures = client.map(load_and_analyze, arglist, retries=3)
        ret = [fut.result() for fut in futures]

    walltime_t1 = time.time()

    print("Merging outputs")
    hists = {ds[0]: [] for ds in datasets}
    numevents = {ds[0]: 0 for ds in datasets}
    for r, _args in zip(ret, arglist):
        rh = r["hists"]
        ds = _args[2]
        hists[ds] += [Results(r["hists"])]
        numevents[ds] += r["numevents"]

    timing = sum([r["timing"] for r in ret], Results({}))
    timing["cuda"] = use_cuda
    timing["njec"] = args.njec
    timing["nthreads"] = args.nthreads
    timing["walltime"] = walltime_t1 - walltime_t0

    for k, v in hists.items():
        hists[k] = sum(hists[k], Results({}))

    print("Writing output pkl")
    with open(args.out, "wb") as fi:
        pickle.dump({
            "hists": hists,
Пример #10
0
def analyze_data(data,
                 sample,
                 NUMPY_LIB=None,
                 parameters={},
                 samples_info={},
                 is_mc=True,
                 lumimask=None,
                 cat=False,
                 DNN=False,
                 DNN_model=None,
                 jets_met_corrected=True):
    #Output structure that will be returned and added up among the files.
    #Should be relatively small.
    ret = Results()

    muons = data["Muon"]
    electrons = data["Electron"]
    scalars = data["eventvars"]
    jets = data["Jet"]

    nEvents = muons.numevents()
    indices = {}
    indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32)
    indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32)

    mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool)

    # apply event cleaning and PV selection
    flags = [
        "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter",
        "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter",
        "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter",
        "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter"
    ]
    if not is_mc:
        flags.append("Flag_eeBadScFilter")
    for flag in flags:
        mask_events = mask_events & scalars[flag]
    mask_events = mask_events & (scalars["PV_npvsGood"] > 0)
    #mask_events = vertex_selection(scalars, mask_events)

    # apply object selection for muons, electrons, jets
    good_muons, veto_muons = lepton_selection(muons, parameters["muons"])
    good_electrons, veto_electrons = lepton_selection(electrons,
                                                      parameters["electrons"])
    good_jets = jet_selection(jets, muons,
                              (veto_muons | good_muons), parameters["jets"],
                              jets_met_corrected) & jet_selection(
                                  jets, electrons,
                                  (veto_electrons | good_electrons),
                                  parameters["jets"], jets_met_corrected)
    bjets = good_jets & (
        getattr(jets, parameters["btagging algorithm"]) >
        parameters["btagging WP"][parameters["btagging algorithm"]])

    # apply basic event selection -> individual categories cut later
    nleps = NUMPY_LIB.add(
        ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"],
                          NUMPY_LIB.int8),
        ha.sum_in_offsets(electrons, good_electrons, mask_events,
                          electrons.masks["all"], NUMPY_LIB.int8))
    nMuons = ha.sum_in_offsets(muons, good_muons, mask_events,
                               muons.masks["all"], NUMPY_LIB.int8)
    nElectrons = ha.sum_in_offsets(electrons, good_electrons, mask_events,
                                   electrons.masks["all"], NUMPY_LIB.int8)

    lepton_veto = NUMPY_LIB.add(
        ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"],
                          NUMPY_LIB.int8),
        ha.sum_in_offsets(electrons, veto_electrons, mask_events,
                          electrons.masks["all"], NUMPY_LIB.int8))
    njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"],
                              NUMPY_LIB.int8)

    btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"],
                              NUMPY_LIB.int8)
    if jets_met_corrected:
        #met = (scalars["MET_pt_nom"] > 20)
        met = (scalars["METFixEE2017_pt_nom"] > 20)
    else:
        met = (scalars["MET_pt"] > 20)

    # trigger logic
    # needs update for different years!
    trigger_el = (scalars["HLT_Ele35_WPTight_Gsf"]
                  | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"]) & (
                      nleps == 1) & (nElectrons == 1)
    trigger_mu = (scalars["HLT_IsoMu27"]) & (nleps == 1) & (nMuons == 1)
    if not is_mc:
        if "SingleMuon" in sample:
            trigger_el = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool)
        if "SingleElectron" in sample:
            trigger_mu = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool)
    mask_events = mask_events & (trigger_el | trigger_mu)

    mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & (
        njets >= 4) & (btags >= 2) & met

    ### calculation of all needed variables
    var = {}

    var["njets"] = njets
    var["btags"] = btags
    var["nleps"] = nleps

    if jets_met_corrected: pt_label = "pt_nom"
    else: pt_label = "pt"
    variables = [
        ("jet", jets, good_jets, "leading", [pt_label, "eta"]),
        ("bjet", jets, bjets, "leading", [pt_label, "eta"]),
    ]

    # special role of lepton
    var["leading_lepton_pt"] = NUMPY_LIB.maximum(
        ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"],
                          mask_events, good_muons),
        ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"],
                          mask_events, good_electrons))
    var["leading_lepton_eta"] = NUMPY_LIB.maximum(
        ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"],
                          mask_events, good_muons),
        ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"],
                          mask_events, good_electrons))

    # all other variables
    for v in variables:
        calculate_variable_features(v, mask_events, indices, var)

    #synch
    #mask = (scalars["event"] == 2895765)

    # calculate weights for MC samples
    weights = {}
    weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32)

    if is_mc:
        weights["nominal"] = weights["nominal"] * scalars[
            "genWeight"] * parameters["lumi"] * samples_info[sample][
                "XS"] / samples_info[sample]["ngen_weight"]

        # pu corrections
        #pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"])
        pu_weights = compute_pu_weights(parameters["pu_corrections_target"],
                                        weights["nominal"],
                                        scalars["Pileup_nTrueInt"],
                                        scalars["Pileup_nTrueInt"])
        weights["nominal"] = weights["nominal"] * pu_weights
        var["pu_weights"] = pu_weights

        # lepton SF corrections
        electron_weights = compute_lepton_weights(
            electrons, (electrons.deltaEtaSC + electrons.eta), electrons.pt,
            mask_events, good_electrons, evaluator,
            ["el_triggerSF", "el_recoSF", "el_idSF"])
        muon_weights = compute_lepton_weights(
            muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons,
            evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"])
        weights[
            "nominal"] = weights["nominal"] * muon_weights * electron_weights

        # btag SF corrections
        btag_weights = compute_btag_weights(jets, mask_events, good_jets,
                                            parameters["btag_SF_target"],
                                            jets_met_corrected,
                                            parameters["btagging algorithm"])
        var["btag_weights"] = btag_weights
        weights["nominal"] = weights["nominal"] * btag_weights

    #in case of data: check if event is in golden lumi file
    if not is_mc and not (lumimask is None):
        mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"])
        mask_events = mask_events & mask_lumi

    #evaluate DNN
    if DNN:
        DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons,
                                muons, good_muons, scalars, mask_events,
                                nEvents, DNN, DNN_model)

    # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf
    processes = {}
    if sample.startswith("TTTo"):  #Changed for TTV samples
        ttCls = scalars["genTtbarId"] % 100
        processes["ttbb"] = mask_events & (ttCls >= 53) & (ttCls <= 56)
        processes["tt2b"] = mask_events & (ttCls == 52)
        processes["ttb"] = mask_events & (ttCls == 51)
        processes["ttcc"] = mask_events & (ttCls >= 41) & (ttCls <= 45)
        ttHF = ((ttCls >= 53) &
                (ttCls <= 56)) | (ttCls == 52) | (ttCls == 51) | (
                    (ttCls >= 41) & (ttCls <= 45))
        processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF)
    else:
        processes["unsplit"] = mask_events

    for p in processes.keys():

        mask_events_split = processes[p]

        # Categories
        categories = {}
        categories["sl_jge4_tge2"] = mask_events_split
        categories["sl_jge4_tge3"] = mask_events_split & (btags >= 3)
        categories["sl_jge4_tge4"] = mask_events_split & (btags >= 4)

        categories["sl_j4_tge3"] = mask_events_split & (njets
                                                        == 4) & (btags >= 3)
        categories["sl_j5_tge3"] = mask_events_split & (njets
                                                        == 5) & (btags >= 3)
        categories["sl_jge6_tge3"] = mask_events_split & (njets >= 6) & (btags
                                                                         >= 3)

        categories["sl_j4_t3"] = mask_events_split & (njets == 4) & (btags
                                                                     == 3)
        categories["sl_j4_tge4"] = mask_events_split & (njets
                                                        == 4) & (btags >= 4)
        categories["sl_j5_t3"] = mask_events_split & (njets == 5) & (btags
                                                                     == 3)
        categories["sl_j5_tge4"] = mask_events_split & (njets
                                                        == 5) & (btags >= 4)
        categories["sl_jge6_t3"] = mask_events_split & (njets >= 6) & (btags
                                                                       == 3)
        categories["sl_jge6_tge4"] = mask_events_split & (njets >= 6) & (btags
                                                                         >= 4)

        #print("sl_j4_t3", scalars["event"][categories["sl_j4_t3"]], len(scalars["event"][categories["sl_j4_t3"]]))
        #print("sl_j5_t3", scalars["event"][categories["sl_j5_t3"]], len(scalars["event"][categories["sl_j5_t3"]]))
        #print("sl_jge6_t3", scalars["event"][categories["sl_jge6_t3"]], len(scalars["event"][categories["sl_jge6_t3"]]))
        #print("sl_j4_tge4", scalars["event"][categories["sl_j4_tge4"]], len(scalars["event"][categories["sl_j4_tge4"]]))
        #print("sl_j5_tge4", scalars["event"][categories["sl_j5_tge4"]], len(scalars["event"][categories["sl_j5_tge4"]]))
        #print("sl_jge6_tge4", scalars["event"][categories["sl_jge6_tge4"]], len(scalars["event"][categories["sl_jge6_tge4"]]))

        if not isinstance(cat, list):
            cat = [cat]
        for c in cat:
            cut = categories[c]
            cut_name = c

            if p == "unsplit":
                if "Run" in sample:
                    name = "data" + "_" + cut_name
                else:
                    name = samples_info[sample]["process"] + "_" + cut_name
            else:
                name = p + "_" + cut_name

            # create histograms filled with weighted events
            for k in var.keys():
                if not k in histogram_settings.keys():
                    raise Exception(
                        "please add variable {0} to definitions_analysis.py".
                        format(k))
                hist = Histogram(*ha.histogram_from_vector(
                    var[k][cut], weights["nominal"][cut],
                    NUMPY_LIB.linspace(histogram_settings[k][0],
                                       histogram_settings[k][1],
                                       histogram_settings[k][2])))
                ret["hist_{0}_{1}".format(name, k)] = hist

            if DNN:
                if DNN == "mass_fit":
                    hist_DNN = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 300., 30)))
                    hist_DNN_zoom = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 170., 30)))
                else:
                    hist_DNN = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 1., 16)))
                ret["hist_{0}_DNN".format(name)] = hist_DNN
                ret["hist_{0}_DNN_zoom".format(name)] = hist_DNN_zoom

    #TODO: implement JECs

    ## To display properties of a single event
    #evts = [5991859]
    #mask = NUMPY_LIB.zeros_like(mask_events)
    #for iev in evts:
    #  mask |= (scalars["event"] == iev)
    ##import pdb
    ##pdb.set_trace()
    #print("mask", mask)
    #print('nevt', scalars["event"][mask])
    #print('pass sel', mask_events[mask])
    #print('nleps', nleps[mask])
    #print('njets', njets[mask])
    ##print('met', scalars['MET_pt_nom'][mask])
    ##print('lep_pt', leading_lepton_pt[mask])
    ##print('jet_pt', leading_jet_pt[mask])
    ##print('lep_eta', leading_lepton_eta[mask])
    #print('pu_weight', pu_weights[mask])
    #print('btag_weight', btag_weights[mask])
    #print('lep_weight', muon_weights[mask] * electron_weights[mask])
    #print('nevents', np.count_nonzero(mask_events))

    #np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
    #for evt in evts:
    #    evt_idx = NUMPY_LIB.where( scalars["event"] == evt )[0][0]
    #    start = jets.offsets[evt_idx]
    #    stop  = jets.offsets[evt_idx+1]
    #    print(f'!!! EVENT {evt} !!!')
    #    print(f'njets good {njets[evt_idx]}, total {stop-start}')
    #    #print('jets mask', nonbjets[start:stop])
    #    print('jets pt', jets.pt_nom[start:stop])
    #    print('jets eta', jets.eta[start:stop])
    #    print('jets btag', getattr(jets, parameters["btagging algorithm"])[start:stop])
    #    print('jet Id', jets.jetId[start:stop]),
    #    print('jet puId', jets.puId[start:stop])

    return ret
Пример #11
0
    filenames = None
    if not args.filelist is None:
        filenames = [l.strip() for l in open(args.filelist).readlines()]
    else:
        filenames = args.filenames

    print("Number of files:", len(filenames))

    for fn in filenames:
        if not fn.endswith(".root"):
            print(fn)
            raise Exception(
                "Must supply ROOT filename, but got {0}".format(fn))

    results = Results()

    for ibatch, files_in_batch in enumerate(
            chunks(filenames, args.files_per_batch)):
        #define our dataset
        structs = ["Jet", "Muon", "Electron"]
        #dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event)
        dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event,
                                 "Events", structs, arrays_event)
        dataset.get_cache_dir = lambda fn, loc=args.cache_location: os.path.join(
            loc, fn)

        if not args.from_cache:
            #Load data from ROOT files
            dataset.preload(nthreads=args.nthreads, verbose=True)
Пример #12
0
def analyze_data_function(data, parameters):
    ret = Results()
    ha = parameters["ha"]
    num_events = data["num_events"]
    lep = data["Lep"]
    lep.hepaccelerate_backend = ha
    lep.attrs_data["pt"] = lep.lep_pt
    lep.attrs_data["eta"] = lep.lep_eta
    lep.attrs_data["phi"] = lep.lep_phi
    lep.attrs_data["charge"] = lep.lep_charge
    lep.attrs_data["type"] = lep.lep_type

    lep_mass = np.zeros_like(lep["pt"], dtype=nplib.float32)
    lep_mass = np.where(lep["type"] == 11, 0.511, lep_mass)
    lep_mass = np.where(lep["type"] == 13, 105.65837, lep_mass)

    lep.attrs_data["mass"] = lep_mass
    mask_events = nplib.ones(lep.numevents(), dtype=nplib.bool)

    lep_ele = lep["type"] == 11
    lep_muon = lep["type"] == 13

    ele_Iso = np.logical_and(
        lep_ele,
        np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15,
                       lep.lep_etcone20 / lep.pt < 0.20))
    muon_Iso = np.logical_and(
        lep_muon,
        np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15,
                       lep.lep_etcone20 / lep.pt < 0.30))
    pass_iso = np.logical_or(ele_Iso, muon_Iso)
    lep.attrs_data["pass_iso"] = pass_iso

    num_lep_event = kernels.sum_in_offsets(
        backend,
        lep.offsets,
        lep.masks["all"],
        mask_events,
        lep.masks["all"],
        nplib.int8,
    )
    mask_events_4lep = num_lep_event == 4

    lep_attrs = ["pt", "eta", "phi", "charge", "type", "mass",
                 "pass_iso"]  #, "ptcone30", "etcone20"]

    lep0 = lep.select_nth(0,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep1 = lep.select_nth(1,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep2 = lep.select_nth(2,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep3 = lep.select_nth(3,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)

    mask_event_sumchg_zero = (lep0["charge"] + lep1["charge"] +
                              lep2["charge"] + lep3["charge"] == 0)
    sum_lep_type = lep0["type"] + lep1["type"] + lep2["type"] + lep3["type"]
    all_pass_iso = (lep0["pass_iso"] & lep1["pass_iso"] & lep2["pass_iso"]
                    & lep3["pass_iso"])

    mask_event_sum_lep_type = np.logical_or(
        (sum_lep_type == 44),
        np.logical_or((sum_lep_type == 48), (sum_lep_type == 52)))
    mask_events = mask_events & mask_event_sumchg_zero & mask_events_4lep & mask_event_sum_lep_type & all_pass_iso

    mask_lep1_passing_pt = lep1["pt"] > parameters["leading_lep_ptcut"]
    mask_lep2_passing_pt = lep2["pt"] > parameters["lep_ptcut"]

    mask_events = mask_events & mask_lep1_passing_pt & mask_lep2_passing_pt

    l0 = to_cartesian(lep0)
    l1 = to_cartesian(lep1)
    l2 = to_cartesian(lep2)
    l3 = to_cartesian(lep3)

    llll = {k: l0[k] + l1[k] + l2[k] + l3[k] for k in ["px", "py", "pz", "e"]}

    llll_sph = to_spherical(llll)

    llll_sph["mass"] = llll_sph["mass"] / 1000.  # Convert to GeV

    #import pdb;pdb.set_trace();
    # compute a weighted histogram
    weights = nplib.ones(num_events, dtype=nplib.float32)
    ## Add xsec weights based on sample name
    if parameters["is_mc"]:
        weights = data['eventvars']['mcWeight'] * data['eventvars'][
            'scaleFactor_PILEUP'] * data['eventvars']['scaleFactor_ELE'] * data[
                'eventvars']['scaleFactor_MUON'] * data['eventvars'][
                    'scaleFactor_LepTRIGGER']
        info = infofile.infos[parameters["sample"]]
        weights *= (lumi * 1000 * info["xsec"]) / (info["sumw"] *
                                                   info["red_eff"])

    bins = nplib.linspace(110, 150, 11, dtype=nplib.float32)
    hist_m4lep = Histogram(*kernels.histogram_from_vector(
        backend,
        llll_sph["mass"][mask_events],
        weights[mask_events],
        bins,
    ))
    # save it to the output
    ret["hist_m4lep"] = hist_m4lep
    return ret
Пример #13
0
def analyze_data(data, sample, NUMPY_LIB=None, parameters={}, samples_info={}, is_mc=True, lumimask=None, cat=False, boosted=False, DNN=False, DNN_model=None):
    #Output structure that will be returned and added up among the files.
    #Should be relatively small.
    ret = Results()

    muons = data["Muon"]
    electrons = data["Electron"]
    scalars = data["eventvars"]
    jets = data["Jet"]

    nEvents = muons.numevents()

    mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool)

    # apply event cleaning, PV selection and trigger selection
    flags = [
        "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter"]
    if not is_mc:
        flags.append("Flag_eeBadScFilter")
    for flag in flags:
        mask_events = mask_events & scalars[flag]
    if args.year.startswith('2016'):
        trigger = (scalars["HLT_Ele27_WPTight_Gsf"] | scalars["HLT_IsoMu24"]  | scalars["HLT_IsoTkMu24"])
    else:
        trigger = (scalars["HLT_Ele35_WPTight_Gsf"] | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"] | scalars["HLT_IsoMu27"])
    mask_events = mask_events & trigger
    mask_events = mask_events & (scalars["PV_npvsGood"]>0)
    #mask_events = vertex_selection(scalars, mask_events)

    # apply object selection for muons, electrons, jets
    good_muons, veto_muons = lepton_selection(muons, parameters["muons"])
    good_electrons, veto_electrons = lepton_selection(electrons, parameters["electrons"])
    good_jets = jet_selection(jets, muons, (veto_muons | good_muons), parameters["jets"]) & jet_selection(jets, electrons, (veto_electrons | good_electrons) , parameters["jets"])
    bjets = good_jets & (getattr(jets, parameters["btagging algorithm"]) > parameters["btagging WP"])

    # apply basic event selection -> individual categories cut later
    nleps =  NUMPY_LIB.add(ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, good_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8))
    lepton_veto = NUMPY_LIB.add(ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, veto_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8))
    njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
    btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
    met = (scalars["MET_pt"] > 20)

    # apply basic event definition (inverted for boosted analysis)
    if boosted:
      mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & NUMPY_LIB.invert( (njets >= 4) & (btags >=2) ) & met
    else:
      mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & (njets >= 4) & (btags >=2) & met

    ### check overlap between AK4 and AK8 jets: if (based on tau32 and tau21) the AK8 jet is a t/H/W candidate remove the AK4 jet, otherwise remove the AK8 jet
    if boosted:

      fatjets = data["FatJet"]
      genparts = data["GenPart"]

      # get fatjets
      good_fatjets = jet_selection(fatjets, muons, (veto_muons | good_muons), parameters["fatjets"]) & jet_selection(fatjets, electrons, (veto_electrons | good_electrons), parameters["fatjets"])
      bfatjets = good_fatjets & (fatjets.btagHbb > parameters["bbtagging WP"]) 

      fatjets.tau32 = NUMPY_LIB.divide(fatjets.tau3, fatjets.tau2)
      fatjets.tau21 = NUMPY_LIB.divide(fatjets.tau2, fatjets.tau1)
      jets_to_keep = ha.mask_overlappingAK4(jets, good_jets, fatjets, good_fatjets, 1.2, tau32cut=parameters["fatjets"]["tau32cut"], tau21cut=parameters["fatjets"]["tau21cut"])
      non_overlapping_fatjets = ha.mask_deltar_first(fatjets, good_fatjets, jets, good_jets, 1.2)

      good_jets &= jets_to_keep
      good_fatjets &= non_overlapping_fatjets | (fatjets.tau32 < parameters["fatjets"]["tau32cut"]) | (fatjets.tau21 < parameters["fatjets"]["tau21cut"]) #we keep fat jets which are not overlapping, or if they are either a top or W/H candidate

      top_candidates = (fatjets.tau32 < parameters["fatjets"]["tau32cut"])
      WH_candidates = (fatjets.tau32 > tau32cut) & (fatjets.tau21 < parameters["fatjets"]["tau21cut"])
      bjets = good_jets & (jets.btagDeepB > parameters["btagging WP"])
      njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
      btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8)

      bbtags = ha.sum_in_offsets(fatjets, bfatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      ntop_candidates = ha.sum_in_offsets(fatjets, top_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      nWH_candidates = ha.sum_in_offsets(fatjets, WH_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)

      ### 2 fat jets from H and W, 2 b jets from the tops
      #mask_events &= (nWH_candidates > 1) & (btags > 1)
      ### 1 top candidate and 1 H candidate, and 1 b jet from the leptonic top
      mask_events &= (ntop_candidates > 0) & (nWH_candidates > 0) & (btags > 0)

    ### calculation of all needed variables
    var = {}

    var["njets"] = njets
    var["btags"] = btags
    var["nleps"] = nleps
    if boosted:
      higgs = (genparts.pdgId == 25) & (genparts.status==62)
      tops  = ( (genparts.pdgId == 6) | (genparts.pdgId == -6) ) & (genparts.status==62)
      var["nfatjets"] = ha.sum_in_offsets(fatjets, good_fatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      var["ntop_candidates"] = ha.sum_in_offsets(fatjets, tops, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)

    indices = {}    
    indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32)
    indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32)
    if boosted:
      indices["inds_WHcandidates"] = ha.index_in_offsets(fatjets.btagHbb, fatjets.offsets, 1, mask_events, WH_candidates)


    variables = [
        ("jet", jets, good_jets, "leading", ["pt", "eta"]),
        ("bjet", jets, bjets, "leading", ["pt", "eta"]),
    ]

    if boosted:
        variables += [
            ("fatjet", fatjets, good_fatjets, "leading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("fatjet", fatjets, good_fatjets, "subleading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("top_candidate", fatjets, top_candidates, "leading", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("WH_candidate", fatjets, WH_candidates, "inds_WHcandidates", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("higgs", genparts, higgs, "leading", ["pt", "eta"]),
            ("tops", genparts, tops, "leading", ["pt", "eta"])
    ]

    # special role of lepton
    var["leading_lepton_pt"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"], mask_events, good_electrons))
    var["leading_lepton_eta"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"], mask_events, good_electrons))

    # all other variables
    for v in variables:
        calculate_variable_features(v, mask_events, indices, var)


    # calculate weights for MC samples
    weights = {}
    weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32)

    if is_mc:
        weights["nominal"] = weights["nominal"] * scalars["genWeight"] * parameters["lumi"] * samples_info[sample]["XS"] / samples_info[sample]["ngen_weight"]

        # pu corrections
        pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"])
        weights["nominal"] = weights["nominal"] * pu_weights

        # lepton SF corrections
        electron_weights = compute_lepton_weights(electrons, electrons.pt, (electrons.deltaEtaSC + electrons.eta), mask_events, good_electrons, evaluator, ["el_triggerSF", "el_recoSF", "el_idSF"])
        muon_weights = compute_lepton_weights(muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons, evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"])
        weights["nominal"] = weights["nominal"] * muon_weights * electron_weights

        # btag SF corrections
        btag_weights = compute_btag_weights(jets, mask_events, good_jets, evaluator)
        weights["nominal"] = weights["nominal"] * btag_weights

    #in case of data: check if event is in golden lumi file
    if not is_mc and not (lumimask is None):
        mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"])
        mask_events = mask_events & mask_lumi

    #evaluate DNN
    if DNN:
        DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons, muons, good_muons, scalars, mask_events, DNN, DNN_model)

    # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf
    processes = {}
    if sample.startswith("TT"):
        ttCls = scalars["genTtbarId"]%100
        processes["ttbb"] = mask_events & (ttCls >=53) & (ttCls <=56)
        processes["tt2b"] = mask_events & (ttCls ==52)
        processes["ttb"] = mask_events & (ttCls ==51)
        processes["ttcc"] = mask_events & (ttCls >=41) & (ttCls <=45)
        ttHF =  ((ttCls >=53) & (ttCls <=56)) | (ttCls ==52) | (ttCls ==51) | ((ttCls >=41) & (ttCls <=45))
        processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF)
    else:
        processes["unsplit"] = mask_events

    for p in processes.keys():

        mask_events_split = processes[p]

        # Categories
        categories = {}
        if not boosted:
          categories["sl_jge4_tge2"] = mask_events_split
          categories["sl_jge4_tge3"] = mask_events_split & (btags >=3)

          categories["sl_j4_tge3"] = mask_events_split & (njets ==4) & (btags >=3)
          categories["sl_j5_tge3"] = mask_events_split & (njets ==5) & (btags >=3)
          categories["sl_jge6_tge3"] = mask_events_split & (njets >=6) & (btags >=3)

          categories["sl_j4_t3"] = mask_events_split & (njets ==4) & (btags ==3)
          categories["sl_j4_tge4"] = mask_events_split & (njets ==4) & (btags >=4)
          categories["sl_j5_t3"] = mask_events_split & (njets ==5) & (btags ==3)
          categories["sl_j5_tge4"] = mask_events_split & (njets ==5) & (btags >=4)
          categories["sl_jge6_t3"] = mask_events_split & (njets >=6) & (btags ==3)
          categories["sl_jge6_tge4"] = mask_events_split & (njets >=6) & (btags >=4)
        
        if not isinstance(cat, list):
            cat = [cat] 
        for c in cat:
            cut = categories[c]
            cut_name = c

            if p=="unsplit":
                if "Run" in sample:
                    name = "data" + "_" + cut_name
                else:
                    name = samples_info[sample]["process"] + "_" + cut_name
            else:
                name = p + "_" + cut_name

            # create histograms filled with weighted events
            for k in var.keys():
                if not k in histogram_settings.keys():
                    raise Exception("please add variable {0} to config_analysis.py".format(k))
                hist = Histogram(*ha.histogram_from_vector(var[k][cut], weights["nominal"][cut], NUMPY_LIB.linspace(histogram_settings[k][0], histogram_settings[k][1], histogram_settings[k][2])))
                ret["hist_{0}_{1}".format(name, k)] = hist

            if DNN:
                if DNN.endswith("multiclass"):
                    class_pred = NUMPY_LIB.argmax(DNN_pred, axis=1)
                    for n, n_name in zip([0,1,2,3,4,5], ["ttH", "ttbb", "tt2b", "ttb", "ttcc", "ttlf"]):
                        node = (class_pred == n)
                        DNN_node = DNN_pred[:,n]
                        hist_DNN = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,16)))
                        ret["hist_{0}_DNN_{1}".format(name, n_name)] = hist_DNN
                        hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,1000)))
                        ret["hist_{0}_DNN_ROC_{1}".format(name, n_name)] = hist_DNN_ROC

                else:
                    hist_DNN = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,16)))
                    ret["hist_{0}_DNN".format(name)] = hist_DNN
                    hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,1000)))
                    ret["hist_{0}_DNN_ROC".format(name)] = hist_DNN_ROC


    #TODO: implement JECs

    return ret
Пример #14
0
        arrays_event += ["PV_npvsGood", "Pileup_nTrueInt", "genWeight"]

    filenames = None
    if not args.filelist is None:
        filenames = [l.strip() for l in open(args.filelist).readlines()]
    else:
        filenames = args.filenames

    print("Number of files:", len(filenames))

    for fn in filenames:
        if not fn.endswith(".root"):
            print(fn)
            raise Exception("Must supply ROOT filename, but got {0}".format(fn))

    results = Results()


    for ibatch, files_in_batch in enumerate(chunks(filenames, args.files_per_batch)):
        #define our dataset
        structs = ["Jet", "Muon", "Electron"]
        if args.boosted:
          structs.append(["FatJet", "GenPart"])
        dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event)
        dataset.get_cache_dir = lambda fn,loc=args.cache_location: os.path.join(loc, fn)

        if not args.from_cache:
            #Load data from ROOT files
            dataset.preload(nthreads=args.nthreads, verbose=True)

            #prepare the object arrays on the host or device
Пример #15
0
import numpy as np
from hepaccelerate.utils import Histogram, Results
from glob import glob
import json,os,argparse
from pdb import set_trace

flist = glob('results/201*/v12/met20_btagDDBvL086/nominal/btagEfficiencyMaps/out_btagEfficiencyMaps_*json')

def divide(h1,h2):
  contents    = h1.contents/h2.contents
  contents_w2 = h1.contents_w2/h2.contents_w2
  edges       = h1.edges
  return Histogram(contents, contents_w2, edges)

for fn in flist:
  with open(fn) as f:
    data = json.load(f)
  for h in data:
    data[h] = Histogram( *data[h].values() )

  for flav in ['b','l','lc']:
    for var in ['central','updown']:
      data[f'eff_flav{flav}_{var}'] = divide( data[f'btags_flav{flav}_{var}'], data[f'total_flav{flav}_{var}'] )

  ret = Results(data)
  ret.save_json(fn)