Python Results примеры использования

Язык программирования: Python

Пространство имен/Пакет: hepaccelerate.utils

Класс/Тип: Results

Примеров на hotexamples.com: 15

Python Results - 15 примеров найдено. Это лучшие примеры Python кода для hepaccelerate.utils.Results, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Results(14)

save_json(3)

Основные методы

Results (14)

save_json (3)

Пример #1

Показать файл

def analyze_data_function(data, parameters):
    ret = Results()

    num_events = data["num_events"]
    muons = data["Muon"]
    mu_pt = nplib.sqrt(muons.Px**2 + muons.Py**2)
    muons.attrs_data["pt"] = mu_pt

    mask_events = nplib.ones(muons.numevents(), dtype=nplib.bool)
    mask_muons_passing_pt = muons.pt > parameters["muons_ptcut"]
    num_muons_event = kernels.sum_in_offsets(backend, muons.offsets,
                                             mask_muons_passing_pt,
                                             mask_events, muons.masks["all"],
                                             nplib.int8)
    mask_events_dimuon = num_muons_event == 2

    #get the leading muon pt in events that have exactly two muons
    inds = nplib.zeros(num_events, dtype=nplib.int32)
    leading_muon_pt = kernels.get_in_offsets(backend, muons.offsets, muons.pt,
                                             inds, mask_events_dimuon,
                                             mask_muons_passing_pt)

    #compute a weighted histogram
    weights = nplib.ones(num_events, dtype=nplib.float32)
    bins = nplib.linspace(0, 300, 101, dtype=nplib.float32)
    hist_muons_pt = Histogram(*kernels.histogram_from_vector(
        backend, leading_muon_pt[mask_events_dimuon],
        weights[mask_events_dimuon], bins))

    #save it to the output
    ret["hist_leading_muon_pt"] = hist_muons_pt
    return ret

Пример #2

Показать файл

def load_and_analyze(args_tuple):
    fn, args, dataset, entrystart, entrystop, ismc, ichunk = args_tuple
    this_worker = get_worker_wrapper()
    NUMPY_LIB, backend = hepaccelerate.choose_backend(args.use_cuda)

    print("Loading {0}".format(fn))
    ds, timing_results = load_dataset(
        args.datapath,
        fn,
        ismc,
        args.nthreads,
        args.skim,
        NUMPY_LIB,
        backend,
        entrystart,
        entrystop,
    )
    t0 = time.time()
    ret = run_analysis(ds, "{0}_{1}".format(dataset, ichunk),
                       this_worker.dnnmodel, args.use_cuda, ismc)
    t1 = time.time()
    ret["timing"] = Results(timing_results)
    ret["timing"]["run_analysis"] = t1 - t0
    ret["timing"]["num_events"] = ds.numevents()
    return ret

Пример #3

Показать файл

def merge_partial_results(dataset_name: str, dataset_era: str, outpath: str,
                          outpath_partial: str):
    """Merges the output from separate jobs for each dataset.
    
    Args:
        dataset_name (str): Name of the dataset
        dataset_era (str): Dataset era
        outpath (str): Directory with the output results
        outpath_partial (str): Directory with the partial input results
    """
    results = []
    partial_results = glob.glob(
        outpath_partial + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era))
    print("Merging {0} partial results for dataset {1}_{2}".format(
        len(partial_results), dataset_name, dataset_era))

    #Load all thge partial results
    for res_file in partial_results:
        res = pickle.load(open(res_file, "rb"))
        results += [res]

    #Merge the partial results
    results = sum(results, Results({}))

    #Create output directory if it does not exist
    try:
        os.makedirs(outpath + "/results")
    except FileExistsError:
        print("Output directory {} already exists".format(outpath))

    result_filename = outpath + "/results/{0}_{1}.pkl".format(
        dataset_name, dataset_era)
    print("Saving results to {0}".format(result_filename))
    with open(result_filename, "wb") as fi:
        pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL)

Пример #4

Показать файл

def create_variated_histos(hdict,
                           baseline="nominal",
                           variations=shape_systematics):

    if not baseline in hdict.keys():
        raise KeyError("baseline histogram missing")

    #hbase = copy.deepcopy(hdict[baseline])
    hbase = hdict[baseline]
    ret = Results(OrderedDict())
    ret["nominal"] = hbase
    for variation in variations:
        for vdir in ["up", "down"]:
            #print("create_variated_histos", variation, vdir)
            sname = "{0}__{1}".format(variation, vdir)
            if sname.endswith("__up"):
                sname2 = sname.replace("__up", "Up")
            elif sname.endswith("__down"):
                sname2 = sname.replace("__down", "Down")

            if sname not in hdict:
                #print("systematic", sname, "not found, taking baseline")
                hret = hbase
            else:
                hret = hdict[sname]
            ret[sname2] = hret
    return ret

Пример #5

Показать файл

def create_variated_histos(proc,
                           hdict,
                           baseline="nominal",
                           variations=shape_systematics):

    if not baseline in hdict.keys():
        raise KeyError("baseline histogram missing")

    #hbase = copy.deepcopy(hdict[baseline])
    hbase = hdict[baseline]
    ret = Results(OrderedDict())
    ret["nominal"] = hbase
    for variation in variations:
        for vdir in ["up", "down"]:
            #print("create_variated_histos", variation, vdir)
            sname = "{0}__{1}".format(variation, vdir)
            if sname.endswith("__up"):
                sname2 = sname.replace("__up", "Up")
            elif sname.endswith("__down"):
                sname2 = sname.replace("__down", "Down")

            if sname not in hdict:
                #print("systematic", sname, "not found, taking baseline")
                hret = hbase
            else:
                hret = hdict[sname]
            ret[sname2] = hret
    if (('DYLHEScaleWeight' in variations)
            or ('EWZLHEScaleWeight' in variations)):
        h_lhe = []
        h_nom_up = copy.deepcopy(hbase)
        h_nom_down = copy.deepcopy(hbase)
        for i in range(9):
            sname = 'LHEScaleWeight__{0}'.format(i)
            h_lhe.append(hdict[sname])
        for k in range(len(h_lhe[0].contents)):
            for i in range(9):
                if (h_lhe[i].contents[k] > h_nom_up.contents[k]):
                    h_nom_up.contents[k] = h_lhe[i].contents[k]
                if (h_lhe[i].contents[k] < h_nom_down.contents[k]):
                    h_nom_down.contents[k] = h_lhe[i].contents[k]
        #remove the normalization aspect from QCD scale
        sum_nom_up = np.sum(h_nom_up.contents)
        sum_nom_down = np.sum(h_nom_down.contents)
        for k in range(len(h_nom_up.contents)):
            h_nom_up.contents[k] = h_nom_up.contents[k] * np.sum(
                hbase.contents) / sum_nom_up
            h_nom_down.contents[k] = h_nom_down.contents[k] * np.sum(
                hbase.contents) / sum_nom_down

        if ('dy' in proc):
            ret['DYLHEScaleWeightUp'] = h_nom_up
            ret['DYLHEScaleWeightDown'] = h_nom_down
        elif ('ewk' in proc):
            ret['EWZLHEScaleWeightUp'] = h_nom_up
            ret['EWZLHEScaleWeightDown'] = h_nom_down
    return ret

Пример #6

Показать файл

def create_datacard(dict_procs, parameter_name, all_processes, histname,
                    baseline, variations, weight_xs):

    ret = Results(OrderedDict())
    event_counts = {}
    hists_mc = []
    for pid, pid_procs in proc_grps:
        event_counts[pid] = 0
    for proc in all_processes:
        #print("create_datacard", proc)
        rr = dict_procs[proc]
        _variations = variations

        #don't produce variated histograms for data
        if proc == "data":
            _variations = []

        variated_histos = create_variated_histos(proc, rr, baseline,
                                                 _variations)

        for syst_name, histo in variated_histos.items():
            if proc != "data":
                histo = histo * weight_xs[proc]

            if syst_name == "nominal":
                found_proc = 0
                for pid, pid_procs in proc_grps:

                    if proc in pid_procs:
                        event_counts[pid] += np.sum(histo.contents)
                        found_proc = 1
                        #print(pid,proc, syst_name, np.sum(histo.contents))

                if proc != "data":
                    hists_mc += [histo]
                if found_proc == 0:
                    event_counts[proc] = np.sum(histo.contents)
            #create histogram name for combine datacard

            hist_name = "{0}__{2}".format(proc, histname, syst_name)
            if hist_name == "data__nominal":
                hist_name = "data_obs"
            hist_name = hist_name.replace("__nominal", "")

            ret[hist_name] = copy.deepcopy(histo)
    assert (len(hists_mc) > 0)
    hist_mc_tot = copy.deepcopy(hists_mc[0])
    for h in hists_mc[:1]:
        hist_mc_tot += h
    ret["data_fake"] = hist_mc_tot
    ret_g = group_samples_datacard(ret, proc_grps)
    return ret_g, event_counts

Пример #7

Показать файл

Файл: analysis_hmumu.py Проект: YifengWang3128/hepaccelerate-cms

def merge_partial_results(dataset_name, dataset_era, outpath):
    results = []
    partial_results = glob.glob(outpath + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era))
    print("Merging {0} partial results for dataset {1}_{2}".format(len(partial_results), dataset_name, dataset_era))
    for res_file in partial_results:
        res = pickle.load(open(res_file, "rb"))
        results += [res]
    results = sum(results, Results({}))
    try:
        os.makedirs(args.out + "/results")
    except FileExistsError as e:
        pass
    result_filename = args.out + "/results/{0}_{1}.pkl".format(dataset_name, dataset_era)
    print("Saving results to {0}".format(result_filename))
    with open(result_filename, "wb") as fi:
        pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL) 
    return

Пример #8

Показать файл

def run_analysis(dataset, out, dnnmodel, use_cuda, ismc):
    from keras.backend.tensorflow_backend import set_session

    this_worker = get_worker_wrapper()
    NUMPY_LIB = this_worker.NUMPY_LIB
    backend = this_worker.backend
    hists = {}
    histo_bins = {
        "nmu": np.array([0, 1, 2, 3], dtype=np.float32),
        "njet": np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=np.float32),
        "mu_pt": np.linspace(0, 300, 20),
        "mu_eta": np.linspace(-5, 5, 20),
        "mu_phi": np.linspace(-5, 5, 20),
        "mu_iso": np.linspace(0, 1, 20),
        "mu_charge": np.array([-1, 0, 1], dtype=np.float32),
        "met_pt": np.linspace(0, 200, 20),
        "jet_pt": np.linspace(0, 400, 20),
        "jet_eta": np.linspace(-5, 5, 20),
        "jet_phi": np.linspace(-5, 5, 20),
        "jet_btag": np.linspace(0, 1, 20),
        "dnnpred_m": np.linspace(0, 1, 20),
        "dnnpred_s": np.linspace(0, 0.2, 20),
        "inv_mass": np.linspace(150, 200, 20),
        "sumpt": np.linspace(0, 1000, 20),
    }

    t0 = time.time()

    i = 0

    mu = dataset.structs["Muon"][i]
    el = dataset.structs["Electron"][i]
    jets = dataset.structs["Jet"][i]
    evvars = dataset.eventvars[i]

    mu.hepaccelerate_backend = backend
    el.hepaccelerate_backend = backend
    jets.hepaccelerate_backend = backend

    evs_all = NUMPY_LIB.ones(dataset.numevents(), dtype=NUMPY_LIB.bool)

    print("Lepton selection")
    sel_mu, sel_ev_mu = get_selected_muons(mu, 40, 20, 2.4, 0.1)
    sel_ev_mu = sel_ev_mu & (evvars["HLT_IsoMu24"] == True)
    mu.masks["selected"] = sel_mu
    sel_el, sel_ev_el = get_selected_electrons(el, 40, 20, 2.4, 0.1)
    el.masks["selected"] = sel_el

    nmu = ha_kernels.sum_in_offsets(
        backend,
        mu.offsets,
        mu.masks["selected"],
        evs_all,
        mu.masks["all"],
        dtype=NUMPY_LIB.int32,
    )
    nel = ha_kernels.sum_in_offsets(
        backend,
        el.offsets,
        el.masks["selected"],
        evs_all,
        el.masks["all"],
        dtype=NUMPY_LIB.int32,
    )

    # get contiguous arrays of the first two muons for all events
    mu1 = mu.select_nth(0, object_mask=sel_mu)
    mu2 = mu.select_nth(1, object_mask=sel_mu)
    el1 = el.select_nth(0, object_mask=sel_el)
    el2 = el.select_nth(1, object_mask=sel_el)

    weight_ev_mu = apply_lepton_corrections(mu, sel_mu,
                                            this_worker.electron_weights)
    weight_ev_el = apply_lepton_corrections(el, sel_el,
                                            this_worker.electron_weights)

    weights = {"nominal": weight_ev_mu * weight_ev_el}

    weights_jet = {}
    for k in weights.keys():
        weights_jet[k] = NUMPY_LIB.zeros_like(jets.pt)
        ha_kernels.broadcast(backend, jets.offsets, weights["nominal"],
                             weights_jet[k])

    all_jecs = [("nominal", "", None)]
    if ismc:
        for i in range(this_worker.jecs_up.shape[1]):
            all_jecs += [(i, "up", this_worker.jecs_up[:, i])]
            all_jecs += [(i, "down", this_worker.jecs_down[:, i])]

    jets_pt_orig = NUMPY_LIB.copy(jets.pt)

    # per-event histograms
    fill_histograms_several(
        hists,
        "nominal",
        "hist__all__",
        [
            (evvars["MET_pt"], "met_pt", histo_bins["met_pt"]),
        ],
        evs_all,
        weights,
        use_cuda,
    )

    fill_histograms_several(
        hists,
        "nominal",
        "hist__all__",
        [
            (jets.pt, "jets_pt", histo_bins["jet_pt"]),
        ],
        jets.masks["all"],
        weights_jet,
        use_cuda,
    )

    print("Jet selection")
    # loop over the jet corrections
    for ijec, sdir, jec in all_jecs:
        systname = "nominal"
        if ijec != "nominal":
            systname = ("jec{0}".format(ijec), sdir)

        if not jec is None:
            jet_pt_corr = apply_jec(jets_pt_orig, this_worker.jecs_bins, jec)
            # compute the corrected jet pt
            jets.pt = jets_pt_orig * NUMPY_LIB.abs(jet_pt_corr)
        print("jec", ijec, sdir, jets.pt.mean())

        # get selected jets
        sel_jet, sel_bjet = select_jets(jets, mu, el, sel_mu, sel_el, 40, 2.0,
                                        0.3, 0.4)

        # compute the number of jets per event
        njet = ha_kernels.sum_in_offsets(
            backend,
            jets.offsets,
            sel_jet,
            evs_all,
            jets.masks["all"],
            dtype=NUMPY_LIB.int32,
        )
        nbjet = ha_kernels.sum_in_offsets(
            backend,
            jets.offsets,
            sel_bjet,
            evs_all,
            jets.masks["all"],
            dtype=NUMPY_LIB.int32,
        )

        inv_mass_3j = NUMPY_LIB.zeros(jets.numevents(),
                                      dtype=NUMPY_LIB.float32)
        best_comb_3j = NUMPY_LIB.zeros((jets.numevents(), 3),
                                       dtype=NUMPY_LIB.int32)

        if use_cuda:
            this_worker.kernels.comb_3_invmass_closest[32, 256](
                jets.pt,
                jets.eta,
                jets.phi,
                jets.mass,
                jets.offsets,
                172.0,
                inv_mass_3j,
                best_comb_3j,
            )
            cuda.synchronize()
        else:
            this_worker.kernels.comb_3_invmass_closest(
                jets.pt,
                jets.eta,
                jets.phi,
                jets.mass,
                jets.offsets,
                172.0,
                inv_mass_3j,
                best_comb_3j,
            )

        best_btag = NUMPY_LIB.zeros(jets.numevents(), dtype=NUMPY_LIB.float32)
        if use_cuda:
            this_worker.kernels.max_val_comb[32, 1024](jets.btag, jets.offsets,
                                                       best_comb_3j, best_btag)
            cuda.synchronize()
        else:
            this_worker.kernels.max_val_comb(jets.btag, jets.offsets,
                                             best_comb_3j, best_btag)

        # get the events with at least three jets
        sel_ev_jet = njet >= 3
        sel_ev_bjet = nbjet >= 1

        selected_events = (sel_ev_mu | sel_ev_el) & sel_ev_jet & sel_ev_bjet
        print("Selected {0} events".format(selected_events.sum()))

        # get contiguous vectors of the first two jet data
        jet1 = jets.select_nth(0, object_mask=sel_jet)
        jet2 = jets.select_nth(1, object_mask=sel_jet)
        jet3 = jets.select_nth(2, object_mask=sel_jet)

        # create a mask vector for the first two jets
        first_two_jets = NUMPY_LIB.zeros_like(sel_jet)
        inds = NUMPY_LIB.zeros_like(evs_all, dtype=NUMPY_LIB.int32)
        targets = NUMPY_LIB.ones_like(evs_all, dtype=NUMPY_LIB.int32)
        inds[:] = 0
        ha_kernels.set_in_offsets(
            backend,
            jets.offsets,
            first_two_jets,
            inds,
            targets,
            selected_events,
            sel_jet,
        )
        inds[:] = 1
        ha_kernels.set_in_offsets(
            backend,
            jets.offsets,
            first_two_jets,
            inds,
            targets,
            selected_events,
            sel_jet,
        )

        # compute the invariant mass of the first two jets
        dijet_inv_mass, dijet_pt = compute_inv_mass(jets, selected_events,
                                                    sel_jet & first_two_jets,
                                                    use_cuda)

        sumpt_jets = ha_kernels.sum_in_offsets(backend, jets.offsets, jets.pt,
                                               selected_events, sel_jet)

        # create a keras-like array
        arr = NUMPY_LIB.vstack([
            nmu,
            nel,
            njet,
            dijet_inv_mass,
            dijet_pt,
            mu1["pt"],
            mu1["eta"],
            mu1["phi"],
            mu1["charge"],
            mu1["pfRelIso03_all"],
            mu2["pt"],
            mu2["eta"],
            mu2["phi"],
            mu2["charge"],
            mu2["pfRelIso03_all"],
            el1["pt"],
            el1["eta"],
            el1["phi"],
            el1["charge"],
            el1["pfRelIso03_all"],
            el2["pt"],
            el2["eta"],
            el2["phi"],
            el2["charge"],
            el2["pfRelIso03_all"],
            jet1["pt"],
            jet1["eta"],
            jet1["phi"],
            jet1["btag"],
            jet2["pt"],
            jet2["eta"],
            jet2["phi"],
            jet2["btag"],
            inv_mass_3j,
            best_btag,
            sumpt_jets,
        ]).T

        # print("evaluating DNN model")
        with this_worker.graph.as_default():
            set_session(this_worker.session)
            pred = dnnmodel.eval(arr, use_cuda)
            pred = NUMPY_LIB.vstack(pred).T
            pred_m = NUMPY_LIB.mean(pred, axis=1)
            pred_s = NUMPY_LIB.std(pred, axis=1)

        fill_histograms_several(
            hists,
            systname,
            "hist__nmu1_njetge3_nbjetge1__",
            [
                (pred_m, "pred_m", histo_bins["dnnpred_m"]),
                (pred_s, "pred_s", histo_bins["dnnpred_s"]),
                (nmu, "nmu", histo_bins["nmu"]),
                (nel, "nel", histo_bins["nmu"]),
                (njet, "njet", histo_bins["njet"]),
                (mu1["pt"], "mu1_pt", histo_bins["mu_pt"]),
                (mu1["eta"], "mu1_eta", histo_bins["mu_eta"]),
                (mu1["phi"], "mu1_phi", histo_bins["mu_phi"]),
                (mu1["charge"], "mu1_charge", histo_bins["mu_charge"]),
                (mu1["pfRelIso03_all"], "mu1_iso", histo_bins["mu_iso"]),
                (mu2["pt"], "mu2_pt", histo_bins["mu_pt"]),
                (mu2["eta"], "mu2_eta", histo_bins["mu_eta"]),
                (mu2["phi"], "mu2_phi", histo_bins["mu_phi"]),
                (mu2["charge"], "mu2_charge", histo_bins["mu_charge"]),
                (mu2["pfRelIso03_all"], "mu2_iso", histo_bins["mu_iso"]),
                (el1["pt"], "el1_pt", histo_bins["mu_pt"]),
                (el1["eta"], "el1_eta", histo_bins["mu_eta"]),
                (el1["phi"], "el1_phi", histo_bins["mu_phi"]),
                (el1["charge"], "el1_charge", histo_bins["mu_charge"]),
                (el1["pfRelIso03_all"], "el1_iso", histo_bins["mu_iso"]),
                (el2["pt"], "el2_pt", histo_bins["mu_pt"]),
                (el2["eta"], "el2_eta", histo_bins["mu_eta"]),
                (el2["phi"], "el2_phi", histo_bins["mu_phi"]),
                (el2["charge"], "el2_charge", histo_bins["mu_charge"]),
                (el2["pfRelIso03_all"], "el2_iso", histo_bins["mu_iso"]),
                (jet1["pt"], "j1_pt", histo_bins["jet_pt"]),
                (jet1["eta"], "j1_eta", histo_bins["jet_eta"]),
                (jet1["phi"], "j1_phi", histo_bins["jet_phi"]),
                (jet1["btag"], "j1_btag", histo_bins["jet_btag"]),
                (jet2["pt"], "j2_pt", histo_bins["jet_pt"]),
                (jet2["eta"], "j2_eta", histo_bins["jet_eta"]),
                (jet2["phi"], "j2_phi", histo_bins["jet_phi"]),
                (jet2["btag"], "j2_btag", histo_bins["jet_btag"]),
                (inv_mass_3j, "inv_mass_3j", histo_bins["inv_mass"]),
                (best_btag, "best_btag", histo_bins["jet_btag"]),
                (sumpt_jets, "sumpt", histo_bins["sumpt"]),
            ],
            selected_events,
            weights,
            use_cuda,
        )

        # save the array for the first jet correction scenario only
        if save_arrays and ijec == 0:
            outfile_arr = "{0}_arrs.npy".format(out)
            print("Saving array with shape {0} to {1}".format(
                arr.shape, outfile_arr))
            with open(outfile_arr, "wb") as fi:
                np.save(fi, NUMPY_LIB.asnumpy(arr))

    t1 = time.time()

    res = Results({})
    for hn in hists.keys():
        hists[hn] = Results(hists[hn])
    res["hists"] = Results(hists)
    res["numevents"] = dataset.numevents()

    speed = dataset.numevents() / (t1 - t0)
    print("run_analysis: {0:.2E} events in {1:.2f} seconds, speed {2:.2E} Hz".
          format(dataset.numevents(), t1 - t0, speed))
    return res

Пример #9

Показать файл

    if args.dask_server == "debug":
        ret = map(load_and_analyze, arglist)
    else:
        futures = client.map(load_and_analyze, arglist, retries=3)
        ret = [fut.result() for fut in futures]

    walltime_t1 = time.time()

    print("Merging outputs")
    hists = {ds[0]: [] for ds in datasets}
    numevents = {ds[0]: 0 for ds in datasets}
    for r, _args in zip(ret, arglist):
        rh = r["hists"]
        ds = _args[2]
        hists[ds] += [Results(r["hists"])]
        numevents[ds] += r["numevents"]

    timing = sum([r["timing"] for r in ret], Results({}))
    timing["cuda"] = use_cuda
    timing["njec"] = args.njec
    timing["nthreads"] = args.nthreads
    timing["walltime"] = walltime_t1 - walltime_t0

    for k, v in hists.items():
        hists[k] = sum(hists[k], Results({}))

    print("Writing output pkl")
    with open(args.out, "wb") as fi:
        pickle.dump({
            "hists": hists,

Пример #10

Показать файл

Файл: run_analysis.py Проект: chreissel/hepaccelerate

def analyze_data(data,
                 sample,
                 NUMPY_LIB=None,
                 parameters={},
                 samples_info={},
                 is_mc=True,
                 lumimask=None,
                 cat=False,
                 DNN=False,
                 DNN_model=None,
                 jets_met_corrected=True):
    #Output structure that will be returned and added up among the files.
    #Should be relatively small.
    ret = Results()

    muons = data["Muon"]
    electrons = data["Electron"]
    scalars = data["eventvars"]
    jets = data["Jet"]

    nEvents = muons.numevents()
    indices = {}
    indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32)
    indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32)

    mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool)

    # apply event cleaning and PV selection
    flags = [
        "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter",
        "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter",
        "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter",
        "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter"
    ]
    if not is_mc:
        flags.append("Flag_eeBadScFilter")
    for flag in flags:
        mask_events = mask_events & scalars[flag]
    mask_events = mask_events & (scalars["PV_npvsGood"] > 0)
    #mask_events = vertex_selection(scalars, mask_events)

    # apply object selection for muons, electrons, jets
    good_muons, veto_muons = lepton_selection(muons, parameters["muons"])
    good_electrons, veto_electrons = lepton_selection(electrons,
                                                      parameters["electrons"])
    good_jets = jet_selection(jets, muons,
                              (veto_muons | good_muons), parameters["jets"],
                              jets_met_corrected) & jet_selection(
                                  jets, electrons,
                                  (veto_electrons | good_electrons),
                                  parameters["jets"], jets_met_corrected)
    bjets = good_jets & (
        getattr(jets, parameters["btagging algorithm"]) >
        parameters["btagging WP"][parameters["btagging algorithm"]])

    # apply basic event selection -> individual categories cut later
    nleps = NUMPY_LIB.add(
        ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"],
                          NUMPY_LIB.int8),
        ha.sum_in_offsets(electrons, good_electrons, mask_events,
                          electrons.masks["all"], NUMPY_LIB.int8))
    nMuons = ha.sum_in_offsets(muons, good_muons, mask_events,
                               muons.masks["all"], NUMPY_LIB.int8)
    nElectrons = ha.sum_in_offsets(electrons, good_electrons, mask_events,
                                   electrons.masks["all"], NUMPY_LIB.int8)

    lepton_veto = NUMPY_LIB.add(
        ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"],
                          NUMPY_LIB.int8),
        ha.sum_in_offsets(electrons, veto_electrons, mask_events,
                          electrons.masks["all"], NUMPY_LIB.int8))
    njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"],
                              NUMPY_LIB.int8)

    btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"],
                              NUMPY_LIB.int8)
    if jets_met_corrected:
        #met = (scalars["MET_pt_nom"] > 20)
        met = (scalars["METFixEE2017_pt_nom"] > 20)
    else:
        met = (scalars["MET_pt"] > 20)

    # trigger logic
    # needs update for different years!
    trigger_el = (scalars["HLT_Ele35_WPTight_Gsf"]
                  | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"]) & (
                      nleps == 1) & (nElectrons == 1)
    trigger_mu = (scalars["HLT_IsoMu27"]) & (nleps == 1) & (nMuons == 1)
    if not is_mc:
        if "SingleMuon" in sample:
            trigger_el = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool)
        if "SingleElectron" in sample:
            trigger_mu = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool)
    mask_events = mask_events & (trigger_el | trigger_mu)

    mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & (
        njets >= 4) & (btags >= 2) & met

    ### calculation of all needed variables
    var = {}

    var["njets"] = njets
    var["btags"] = btags
    var["nleps"] = nleps

    if jets_met_corrected: pt_label = "pt_nom"
    else: pt_label = "pt"
    variables = [
        ("jet", jets, good_jets, "leading", [pt_label, "eta"]),
        ("bjet", jets, bjets, "leading", [pt_label, "eta"]),
    ]

    # special role of lepton
    var["leading_lepton_pt"] = NUMPY_LIB.maximum(
        ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"],
                          mask_events, good_muons),
        ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"],
                          mask_events, good_electrons))
    var["leading_lepton_eta"] = NUMPY_LIB.maximum(
        ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"],
                          mask_events, good_muons),
        ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"],
                          mask_events, good_electrons))

    # all other variables
    for v in variables:
        calculate_variable_features(v, mask_events, indices, var)

    #synch
    #mask = (scalars["event"] == 2895765)

    # calculate weights for MC samples
    weights = {}
    weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32)

    if is_mc:
        weights["nominal"] = weights["nominal"] * scalars[
            "genWeight"] * parameters["lumi"] * samples_info[sample][
                "XS"] / samples_info[sample]["ngen_weight"]

        # pu corrections
        #pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"])
        pu_weights = compute_pu_weights(parameters["pu_corrections_target"],
                                        weights["nominal"],
                                        scalars["Pileup_nTrueInt"],
                                        scalars["Pileup_nTrueInt"])
        weights["nominal"] = weights["nominal"] * pu_weights
        var["pu_weights"] = pu_weights

        # lepton SF corrections
        electron_weights = compute_lepton_weights(
            electrons, (electrons.deltaEtaSC + electrons.eta), electrons.pt,
            mask_events, good_electrons, evaluator,
            ["el_triggerSF", "el_recoSF", "el_idSF"])
        muon_weights = compute_lepton_weights(
            muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons,
            evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"])
        weights[
            "nominal"] = weights["nominal"] * muon_weights * electron_weights

        # btag SF corrections
        btag_weights = compute_btag_weights(jets, mask_events, good_jets,
                                            parameters["btag_SF_target"],
                                            jets_met_corrected,
                                            parameters["btagging algorithm"])
        var["btag_weights"] = btag_weights
        weights["nominal"] = weights["nominal"] * btag_weights

    #in case of data: check if event is in golden lumi file
    if not is_mc and not (lumimask is None):
        mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"])
        mask_events = mask_events & mask_lumi

    #evaluate DNN
    if DNN:
        DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons,
                                muons, good_muons, scalars, mask_events,
                                nEvents, DNN, DNN_model)

    # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf
    processes = {}
    if sample.startswith("TTTo"):  #Changed for TTV samples
        ttCls = scalars["genTtbarId"] % 100
        processes["ttbb"] = mask_events & (ttCls >= 53) & (ttCls <= 56)
        processes["tt2b"] = mask_events & (ttCls == 52)
        processes["ttb"] = mask_events & (ttCls == 51)
        processes["ttcc"] = mask_events & (ttCls >= 41) & (ttCls <= 45)
        ttHF = ((ttCls >= 53) &
                (ttCls <= 56)) | (ttCls == 52) | (ttCls == 51) | (
                    (ttCls >= 41) & (ttCls <= 45))
        processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF)
    else:
        processes["unsplit"] = mask_events

    for p in processes.keys():

        mask_events_split = processes[p]

        # Categories
        categories = {}
        categories["sl_jge4_tge2"] = mask_events_split
        categories["sl_jge4_tge3"] = mask_events_split & (btags >= 3)
        categories["sl_jge4_tge4"] = mask_events_split & (btags >= 4)

        categories["sl_j4_tge3"] = mask_events_split & (njets
                                                        == 4) & (btags >= 3)
        categories["sl_j5_tge3"] = mask_events_split & (njets
                                                        == 5) & (btags >= 3)
        categories["sl_jge6_tge3"] = mask_events_split & (njets >= 6) & (btags
                                                                         >= 3)

        categories["sl_j4_t3"] = mask_events_split & (njets == 4) & (btags
                                                                     == 3)
        categories["sl_j4_tge4"] = mask_events_split & (njets
                                                        == 4) & (btags >= 4)
        categories["sl_j5_t3"] = mask_events_split & (njets == 5) & (btags
                                                                     == 3)
        categories["sl_j5_tge4"] = mask_events_split & (njets
                                                        == 5) & (btags >= 4)
        categories["sl_jge6_t3"] = mask_events_split & (njets >= 6) & (btags
                                                                       == 3)
        categories["sl_jge6_tge4"] = mask_events_split & (njets >= 6) & (btags
                                                                         >= 4)

        #print("sl_j4_t3", scalars["event"][categories["sl_j4_t3"]], len(scalars["event"][categories["sl_j4_t3"]]))
        #print("sl_j5_t3", scalars["event"][categories["sl_j5_t3"]], len(scalars["event"][categories["sl_j5_t3"]]))
        #print("sl_jge6_t3", scalars["event"][categories["sl_jge6_t3"]], len(scalars["event"][categories["sl_jge6_t3"]]))
        #print("sl_j4_tge4", scalars["event"][categories["sl_j4_tge4"]], len(scalars["event"][categories["sl_j4_tge4"]]))
        #print("sl_j5_tge4", scalars["event"][categories["sl_j5_tge4"]], len(scalars["event"][categories["sl_j5_tge4"]]))
        #print("sl_jge6_tge4", scalars["event"][categories["sl_jge6_tge4"]], len(scalars["event"][categories["sl_jge6_tge4"]]))

        if not isinstance(cat, list):
            cat = [cat]
        for c in cat:
            cut = categories[c]
            cut_name = c

            if p == "unsplit":
                if "Run" in sample:
                    name = "data" + "_" + cut_name
                else:
                    name = samples_info[sample]["process"] + "_" + cut_name
            else:
                name = p + "_" + cut_name

            # create histograms filled with weighted events
            for k in var.keys():
                if not k in histogram_settings.keys():
                    raise Exception(
                        "please add variable {0} to definitions_analysis.py".
                        format(k))
                hist = Histogram(*ha.histogram_from_vector(
                    var[k][cut], weights["nominal"][cut],
                    NUMPY_LIB.linspace(histogram_settings[k][0],
                                       histogram_settings[k][1],
                                       histogram_settings[k][2])))
                ret["hist_{0}_{1}".format(name, k)] = hist

            if DNN:
                if DNN == "mass_fit":
                    hist_DNN = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 300., 30)))
                    hist_DNN_zoom = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 170., 30)))
                else:
                    hist_DNN = Histogram(*ha.histogram_from_vector(
                        DNN_pred[cut], weights["nominal"][cut],
                        NUMPY_LIB.linspace(0., 1., 16)))
                ret["hist_{0}_DNN".format(name)] = hist_DNN
                ret["hist_{0}_DNN_zoom".format(name)] = hist_DNN_zoom

    #TODO: implement JECs

    ## To display properties of a single event
    #evts = [5991859]
    #mask = NUMPY_LIB.zeros_like(mask_events)
    #for iev in evts:
    #  mask |= (scalars["event"] == iev)
    ##import pdb
    ##pdb.set_trace()
    #print("mask", mask)
    #print('nevt', scalars["event"][mask])
    #print('pass sel', mask_events[mask])
    #print('nleps', nleps[mask])
    #print('njets', njets[mask])
    ##print('met', scalars['MET_pt_nom'][mask])
    ##print('lep_pt', leading_lepton_pt[mask])
    ##print('jet_pt', leading_jet_pt[mask])
    ##print('lep_eta', leading_lepton_eta[mask])
    #print('pu_weight', pu_weights[mask])
    #print('btag_weight', btag_weights[mask])
    #print('lep_weight', muon_weights[mask] * electron_weights[mask])
    #print('nevents', np.count_nonzero(mask_events))

    #np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
    #for evt in evts:
    #    evt_idx = NUMPY_LIB.where( scalars["event"] == evt )[0][0]
    #    start = jets.offsets[evt_idx]
    #    stop  = jets.offsets[evt_idx+1]
    #    print(f'!!! EVENT {evt} !!!')
    #    print(f'njets good {njets[evt_idx]}, total {stop-start}')
    #    #print('jets mask', nonbjets[start:stop])
    #    print('jets pt', jets.pt_nom[start:stop])
    #    print('jets eta', jets.eta[start:stop])
    #    print('jets btag', getattr(jets, parameters["btagging algorithm"])[start:stop])
    #    print('jet Id', jets.jetId[start:stop]),
    #    print('jet puId', jets.puId[start:stop])

    return ret

Пример #11

Показать файл

Файл: run_analysis.py Проект: chreissel/hepaccelerate

    filenames = None
    if not args.filelist is None:
        filenames = [l.strip() for l in open(args.filelist).readlines()]
    else:
        filenames = args.filenames

    print("Number of files:", len(filenames))

    for fn in filenames:
        if not fn.endswith(".root"):
            print(fn)
            raise Exception(
                "Must supply ROOT filename, but got {0}".format(fn))

    results = Results()

    for ibatch, files_in_batch in enumerate(
            chunks(filenames, args.files_per_batch)):
        #define our dataset
        structs = ["Jet", "Muon", "Electron"]
        #dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event)
        dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event,
                                 "Events", structs, arrays_event)
        dataset.get_cache_dir = lambda fn, loc=args.cache_location: os.path.join(
            loc, fn)

        if not args.from_cache:
            #Load data from ROOT files
            dataset.preload(nthreads=args.nthreads, verbose=True)

Пример #12

Показать файл

Файл: atlas_hzz.py Проект: hepaccelerate/hepaccelerate

def analyze_data_function(data, parameters):
    ret = Results()
    ha = parameters["ha"]
    num_events = data["num_events"]
    lep = data["Lep"]
    lep.hepaccelerate_backend = ha
    lep.attrs_data["pt"] = lep.lep_pt
    lep.attrs_data["eta"] = lep.lep_eta
    lep.attrs_data["phi"] = lep.lep_phi
    lep.attrs_data["charge"] = lep.lep_charge
    lep.attrs_data["type"] = lep.lep_type

    lep_mass = np.zeros_like(lep["pt"], dtype=nplib.float32)
    lep_mass = np.where(lep["type"] == 11, 0.511, lep_mass)
    lep_mass = np.where(lep["type"] == 13, 105.65837, lep_mass)

    lep.attrs_data["mass"] = lep_mass
    mask_events = nplib.ones(lep.numevents(), dtype=nplib.bool)

    lep_ele = lep["type"] == 11
    lep_muon = lep["type"] == 13

    ele_Iso = np.logical_and(
        lep_ele,
        np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15,
                       lep.lep_etcone20 / lep.pt < 0.20))
    muon_Iso = np.logical_and(
        lep_muon,
        np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15,
                       lep.lep_etcone20 / lep.pt < 0.30))
    pass_iso = np.logical_or(ele_Iso, muon_Iso)
    lep.attrs_data["pass_iso"] = pass_iso

    num_lep_event = kernels.sum_in_offsets(
        backend,
        lep.offsets,
        lep.masks["all"],
        mask_events,
        lep.masks["all"],
        nplib.int8,
    )
    mask_events_4lep = num_lep_event == 4

    lep_attrs = ["pt", "eta", "phi", "charge", "type", "mass",
                 "pass_iso"]  #, "ptcone30", "etcone20"]

    lep0 = lep.select_nth(0,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep1 = lep.select_nth(1,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep2 = lep.select_nth(2,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)
    lep3 = lep.select_nth(3,
                          mask_events_4lep,
                          lep.masks["all"],
                          attributes=lep_attrs)

    mask_event_sumchg_zero = (lep0["charge"] + lep1["charge"] +
                              lep2["charge"] + lep3["charge"] == 0)
    sum_lep_type = lep0["type"] + lep1["type"] + lep2["type"] + lep3["type"]
    all_pass_iso = (lep0["pass_iso"] & lep1["pass_iso"] & lep2["pass_iso"]
                    & lep3["pass_iso"])

    mask_event_sum_lep_type = np.logical_or(
        (sum_lep_type == 44),
        np.logical_or((sum_lep_type == 48), (sum_lep_type == 52)))
    mask_events = mask_events & mask_event_sumchg_zero & mask_events_4lep & mask_event_sum_lep_type & all_pass_iso

    mask_lep1_passing_pt = lep1["pt"] > parameters["leading_lep_ptcut"]
    mask_lep2_passing_pt = lep2["pt"] > parameters["lep_ptcut"]

    mask_events = mask_events & mask_lep1_passing_pt & mask_lep2_passing_pt

    l0 = to_cartesian(lep0)
    l1 = to_cartesian(lep1)
    l2 = to_cartesian(lep2)
    l3 = to_cartesian(lep3)

    llll = {k: l0[k] + l1[k] + l2[k] + l3[k] for k in ["px", "py", "pz", "e"]}

    llll_sph = to_spherical(llll)

    llll_sph["mass"] = llll_sph["mass"] / 1000.  # Convert to GeV

    #import pdb;pdb.set_trace();
    # compute a weighted histogram
    weights = nplib.ones(num_events, dtype=nplib.float32)
    ## Add xsec weights based on sample name
    if parameters["is_mc"]:
        weights = data['eventvars']['mcWeight'] * data['eventvars'][
            'scaleFactor_PILEUP'] * data['eventvars']['scaleFactor_ELE'] * data[
                'eventvars']['scaleFactor_MUON'] * data['eventvars'][
                    'scaleFactor_LepTRIGGER']
        info = infofile.infos[parameters["sample"]]
        weights *= (lumi * 1000 * info["xsec"]) / (info["sumw"] *
                                                   info["red_eff"])

    bins = nplib.linspace(110, 150, 11, dtype=nplib.float32)
    hist_m4lep = Histogram(*kernels.histogram_from_vector(
        backend,
        llll_sph["mass"][mask_events],
        weights[mask_events],
        bins,
    ))
    # save it to the output
    ret["hist_m4lep"] = hist_m4lep
    return ret

Пример #13

Показать файл

Файл: run_analysis.py Проект: kaustuvdatta/hepaccelerate

def analyze_data(data, sample, NUMPY_LIB=None, parameters={}, samples_info={}, is_mc=True, lumimask=None, cat=False, boosted=False, DNN=False, DNN_model=None):
    #Output structure that will be returned and added up among the files.
    #Should be relatively small.
    ret = Results()

    muons = data["Muon"]
    electrons = data["Electron"]
    scalars = data["eventvars"]
    jets = data["Jet"]

    nEvents = muons.numevents()

    mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool)

    # apply event cleaning, PV selection and trigger selection
    flags = [
        "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter"]
    if not is_mc:
        flags.append("Flag_eeBadScFilter")
    for flag in flags:
        mask_events = mask_events & scalars[flag]
    if args.year.startswith('2016'):
        trigger = (scalars["HLT_Ele27_WPTight_Gsf"] | scalars["HLT_IsoMu24"]  | scalars["HLT_IsoTkMu24"])
    else:
        trigger = (scalars["HLT_Ele35_WPTight_Gsf"] | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"] | scalars["HLT_IsoMu27"])
    mask_events = mask_events & trigger
    mask_events = mask_events & (scalars["PV_npvsGood"]>0)
    #mask_events = vertex_selection(scalars, mask_events)

    # apply object selection for muons, electrons, jets
    good_muons, veto_muons = lepton_selection(muons, parameters["muons"])
    good_electrons, veto_electrons = lepton_selection(electrons, parameters["electrons"])
    good_jets = jet_selection(jets, muons, (veto_muons | good_muons), parameters["jets"]) & jet_selection(jets, electrons, (veto_electrons | good_electrons) , parameters["jets"])
    bjets = good_jets & (getattr(jets, parameters["btagging algorithm"]) > parameters["btagging WP"])

    # apply basic event selection -> individual categories cut later
    nleps =  NUMPY_LIB.add(ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, good_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8))
    lepton_veto = NUMPY_LIB.add(ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, veto_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8))
    njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
    btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
    met = (scalars["MET_pt"] > 20)

    # apply basic event definition (inverted for boosted analysis)
    if boosted:
      mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & NUMPY_LIB.invert( (njets >= 4) & (btags >=2) ) & met
    else:
      mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & (njets >= 4) & (btags >=2) & met

    ### check overlap between AK4 and AK8 jets: if (based on tau32 and tau21) the AK8 jet is a t/H/W candidate remove the AK4 jet, otherwise remove the AK8 jet
    if boosted:

      fatjets = data["FatJet"]
      genparts = data["GenPart"]

      # get fatjets
      good_fatjets = jet_selection(fatjets, muons, (veto_muons | good_muons), parameters["fatjets"]) & jet_selection(fatjets, electrons, (veto_electrons | good_electrons), parameters["fatjets"])
      bfatjets = good_fatjets & (fatjets.btagHbb > parameters["bbtagging WP"]) 

      fatjets.tau32 = NUMPY_LIB.divide(fatjets.tau3, fatjets.tau2)
      fatjets.tau21 = NUMPY_LIB.divide(fatjets.tau2, fatjets.tau1)
      jets_to_keep = ha.mask_overlappingAK4(jets, good_jets, fatjets, good_fatjets, 1.2, tau32cut=parameters["fatjets"]["tau32cut"], tau21cut=parameters["fatjets"]["tau21cut"])
      non_overlapping_fatjets = ha.mask_deltar_first(fatjets, good_fatjets, jets, good_jets, 1.2)

      good_jets &= jets_to_keep
      good_fatjets &= non_overlapping_fatjets | (fatjets.tau32 < parameters["fatjets"]["tau32cut"]) | (fatjets.tau21 < parameters["fatjets"]["tau21cut"]) #we keep fat jets which are not overlapping, or if they are either a top or W/H candidate

      top_candidates = (fatjets.tau32 < parameters["fatjets"]["tau32cut"])
      WH_candidates = (fatjets.tau32 > tau32cut) & (fatjets.tau21 < parameters["fatjets"]["tau21cut"])
      bjets = good_jets & (jets.btagDeepB > parameters["btagging WP"])
      njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8)
      btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8)

      bbtags = ha.sum_in_offsets(fatjets, bfatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      ntop_candidates = ha.sum_in_offsets(fatjets, top_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      nWH_candidates = ha.sum_in_offsets(fatjets, WH_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)

      ### 2 fat jets from H and W, 2 b jets from the tops
      #mask_events &= (nWH_candidates > 1) & (btags > 1)
      ### 1 top candidate and 1 H candidate, and 1 b jet from the leptonic top
      mask_events &= (ntop_candidates > 0) & (nWH_candidates > 0) & (btags > 0)

    ### calculation of all needed variables
    var = {}

    var["njets"] = njets
    var["btags"] = btags
    var["nleps"] = nleps
    if boosted:
      higgs = (genparts.pdgId == 25) & (genparts.status==62)
      tops  = ( (genparts.pdgId == 6) | (genparts.pdgId == -6) ) & (genparts.status==62)
      var["nfatjets"] = ha.sum_in_offsets(fatjets, good_fatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)
      var["ntop_candidates"] = ha.sum_in_offsets(fatjets, tops, mask_events, fatjets.masks["all"], NUMPY_LIB.int8)

    indices = {}    
    indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32)
    indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32)
    if boosted:
      indices["inds_WHcandidates"] = ha.index_in_offsets(fatjets.btagHbb, fatjets.offsets, 1, mask_events, WH_candidates)


    variables = [
        ("jet", jets, good_jets, "leading", ["pt", "eta"]),
        ("bjet", jets, bjets, "leading", ["pt", "eta"]),
    ]

    if boosted:
        variables += [
            ("fatjet", fatjets, good_fatjets, "leading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("fatjet", fatjets, good_fatjets, "subleading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("top_candidate", fatjets, top_candidates, "leading", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("WH_candidate", fatjets, WH_candidates, "inds_WHcandidates", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]),
            ("higgs", genparts, higgs, "leading", ["pt", "eta"]),
            ("tops", genparts, tops, "leading", ["pt", "eta"])
    ]

    # special role of lepton
    var["leading_lepton_pt"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"], mask_events, good_electrons))
    var["leading_lepton_eta"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"], mask_events, good_electrons))

    # all other variables
    for v in variables:
        calculate_variable_features(v, mask_events, indices, var)


    # calculate weights for MC samples
    weights = {}
    weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32)

    if is_mc:
        weights["nominal"] = weights["nominal"] * scalars["genWeight"] * parameters["lumi"] * samples_info[sample]["XS"] / samples_info[sample]["ngen_weight"]

        # pu corrections
        pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"])
        weights["nominal"] = weights["nominal"] * pu_weights

        # lepton SF corrections
        electron_weights = compute_lepton_weights(electrons, electrons.pt, (electrons.deltaEtaSC + electrons.eta), mask_events, good_electrons, evaluator, ["el_triggerSF", "el_recoSF", "el_idSF"])
        muon_weights = compute_lepton_weights(muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons, evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"])
        weights["nominal"] = weights["nominal"] * muon_weights * electron_weights

        # btag SF corrections
        btag_weights = compute_btag_weights(jets, mask_events, good_jets, evaluator)
        weights["nominal"] = weights["nominal"] * btag_weights

    #in case of data: check if event is in golden lumi file
    if not is_mc and not (lumimask is None):
        mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"])
        mask_events = mask_events & mask_lumi

    #evaluate DNN
    if DNN:
        DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons, muons, good_muons, scalars, mask_events, DNN, DNN_model)

    # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf
    processes = {}
    if sample.startswith("TT"):
        ttCls = scalars["genTtbarId"]%100
        processes["ttbb"] = mask_events & (ttCls >=53) & (ttCls <=56)
        processes["tt2b"] = mask_events & (ttCls ==52)
        processes["ttb"] = mask_events & (ttCls ==51)
        processes["ttcc"] = mask_events & (ttCls >=41) & (ttCls <=45)
        ttHF =  ((ttCls >=53) & (ttCls <=56)) | (ttCls ==52) | (ttCls ==51) | ((ttCls >=41) & (ttCls <=45))
        processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF)
    else:
        processes["unsplit"] = mask_events

    for p in processes.keys():

        mask_events_split = processes[p]

        # Categories
        categories = {}
        if not boosted:
          categories["sl_jge4_tge2"] = mask_events_split
          categories["sl_jge4_tge3"] = mask_events_split & (btags >=3)

          categories["sl_j4_tge3"] = mask_events_split & (njets ==4) & (btags >=3)
          categories["sl_j5_tge3"] = mask_events_split & (njets ==5) & (btags >=3)
          categories["sl_jge6_tge3"] = mask_events_split & (njets >=6) & (btags >=3)

          categories["sl_j4_t3"] = mask_events_split & (njets ==4) & (btags ==3)
          categories["sl_j4_tge4"] = mask_events_split & (njets ==4) & (btags >=4)
          categories["sl_j5_t3"] = mask_events_split & (njets ==5) & (btags ==3)
          categories["sl_j5_tge4"] = mask_events_split & (njets ==5) & (btags >=4)
          categories["sl_jge6_t3"] = mask_events_split & (njets >=6) & (btags ==3)
          categories["sl_jge6_tge4"] = mask_events_split & (njets >=6) & (btags >=4)
        
        if not isinstance(cat, list):
            cat = [cat] 
        for c in cat:
            cut = categories[c]
            cut_name = c

            if p=="unsplit":
                if "Run" in sample:
                    name = "data" + "_" + cut_name
                else:
                    name = samples_info[sample]["process"] + "_" + cut_name
            else:
                name = p + "_" + cut_name

            # create histograms filled with weighted events
            for k in var.keys():
                if not k in histogram_settings.keys():
                    raise Exception("please add variable {0} to config_analysis.py".format(k))
                hist = Histogram(*ha.histogram_from_vector(var[k][cut], weights["nominal"][cut], NUMPY_LIB.linspace(histogram_settings[k][0], histogram_settings[k][1], histogram_settings[k][2])))
                ret["hist_{0}_{1}".format(name, k)] = hist

            if DNN:
                if DNN.endswith("multiclass"):
                    class_pred = NUMPY_LIB.argmax(DNN_pred, axis=1)
                    for n, n_name in zip([0,1,2,3,4,5], ["ttH", "ttbb", "tt2b", "ttb", "ttcc", "ttlf"]):
                        node = (class_pred == n)
                        DNN_node = DNN_pred[:,n]
                        hist_DNN = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,16)))
                        ret["hist_{0}_DNN_{1}".format(name, n_name)] = hist_DNN
                        hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,1000)))
                        ret["hist_{0}_DNN_ROC_{1}".format(name, n_name)] = hist_DNN_ROC

                else:
                    hist_DNN = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,16)))
                    ret["hist_{0}_DNN".format(name)] = hist_DNN
                    hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,1000)))
                    ret["hist_{0}_DNN_ROC".format(name)] = hist_DNN_ROC


    #TODO: implement JECs

    return ret

Пример #14

Показать файл

Файл: run_analysis.py Проект: kaustuvdatta/hepaccelerate

        arrays_event += ["PV_npvsGood", "Pileup_nTrueInt", "genWeight"]

    filenames = None
    if not args.filelist is None:
        filenames = [l.strip() for l in open(args.filelist).readlines()]
    else:
        filenames = args.filenames

    print("Number of files:", len(filenames))

    for fn in filenames:
        if not fn.endswith(".root"):
            print(fn)
            raise Exception("Must supply ROOT filename, but got {0}".format(fn))

    results = Results()


    for ibatch, files_in_batch in enumerate(chunks(filenames, args.files_per_batch)):
        #define our dataset
        structs = ["Jet", "Muon", "Electron"]
        if args.boosted:
          structs.append(["FatJet", "GenPart"])
        dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event)
        dataset.get_cache_dir = lambda fn,loc=args.cache_location: os.path.join(loc, fn)

        if not args.from_cache:
            #Load data from ROOT files
            dataset.preload(nthreads=args.nthreads, verbose=True)

            #prepare the object arrays on the host or device

Пример #15

Показать файл

import numpy as np
from hepaccelerate.utils import Histogram, Results
from glob import glob
import json,os,argparse
from pdb import set_trace

flist = glob('results/201*/v12/met20_btagDDBvL086/nominal/btagEfficiencyMaps/out_btagEfficiencyMaps_*json')

def divide(h1,h2):
  contents    = h1.contents/h2.contents
  contents_w2 = h1.contents_w2/h2.contents_w2
  edges       = h1.edges
  return Histogram(contents, contents_w2, edges)

for fn in flist:
  with open(fn) as f:
    data = json.load(f)
  for h in data:
    data[h] = Histogram( *data[h].values() )

  for flav in ['b','l','lc']:
    for var in ['central','updown']:
      data[f'eff_flav{flav}_{var}'] = divide( data[f'btags_flav{flav}_{var}'], data[f'total_flav{flav}_{var}'] )

  ret = Results(data)
  ret.save_json(fn)