def analyze_data_function(data, parameters): ret = Results() num_events = data["num_events"] muons = data["Muon"] mu_pt = nplib.sqrt(muons.Px**2 + muons.Py**2) muons.attrs_data["pt"] = mu_pt mask_events = nplib.ones(muons.numevents(), dtype=nplib.bool) mask_muons_passing_pt = muons.pt > parameters["muons_ptcut"] num_muons_event = kernels.sum_in_offsets(backend, muons.offsets, mask_muons_passing_pt, mask_events, muons.masks["all"], nplib.int8) mask_events_dimuon = num_muons_event == 2 #get the leading muon pt in events that have exactly two muons inds = nplib.zeros(num_events, dtype=nplib.int32) leading_muon_pt = kernels.get_in_offsets(backend, muons.offsets, muons.pt, inds, mask_events_dimuon, mask_muons_passing_pt) #compute a weighted histogram weights = nplib.ones(num_events, dtype=nplib.float32) bins = nplib.linspace(0, 300, 101, dtype=nplib.float32) hist_muons_pt = Histogram(*kernels.histogram_from_vector( backend, leading_muon_pt[mask_events_dimuon], weights[mask_events_dimuon], bins)) #save it to the output ret["hist_leading_muon_pt"] = hist_muons_pt return ret
def load_and_analyze(args_tuple): fn, args, dataset, entrystart, entrystop, ismc, ichunk = args_tuple this_worker = get_worker_wrapper() NUMPY_LIB, backend = hepaccelerate.choose_backend(args.use_cuda) print("Loading {0}".format(fn)) ds, timing_results = load_dataset( args.datapath, fn, ismc, args.nthreads, args.skim, NUMPY_LIB, backend, entrystart, entrystop, ) t0 = time.time() ret = run_analysis(ds, "{0}_{1}".format(dataset, ichunk), this_worker.dnnmodel, args.use_cuda, ismc) t1 = time.time() ret["timing"] = Results(timing_results) ret["timing"]["run_analysis"] = t1 - t0 ret["timing"]["num_events"] = ds.numevents() return ret
def merge_partial_results(dataset_name: str, dataset_era: str, outpath: str, outpath_partial: str): """Merges the output from separate jobs for each dataset. Args: dataset_name (str): Name of the dataset dataset_era (str): Dataset era outpath (str): Directory with the output results outpath_partial (str): Directory with the partial input results """ results = [] partial_results = glob.glob( outpath_partial + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era)) print("Merging {0} partial results for dataset {1}_{2}".format( len(partial_results), dataset_name, dataset_era)) #Load all thge partial results for res_file in partial_results: res = pickle.load(open(res_file, "rb")) results += [res] #Merge the partial results results = sum(results, Results({})) #Create output directory if it does not exist try: os.makedirs(outpath + "/results") except FileExistsError: print("Output directory {} already exists".format(outpath)) result_filename = outpath + "/results/{0}_{1}.pkl".format( dataset_name, dataset_era) print("Saving results to {0}".format(result_filename)) with open(result_filename, "wb") as fi: pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL)
def create_variated_histos(hdict, baseline="nominal", variations=shape_systematics): if not baseline in hdict.keys(): raise KeyError("baseline histogram missing") #hbase = copy.deepcopy(hdict[baseline]) hbase = hdict[baseline] ret = Results(OrderedDict()) ret["nominal"] = hbase for variation in variations: for vdir in ["up", "down"]: #print("create_variated_histos", variation, vdir) sname = "{0}__{1}".format(variation, vdir) if sname.endswith("__up"): sname2 = sname.replace("__up", "Up") elif sname.endswith("__down"): sname2 = sname.replace("__down", "Down") if sname not in hdict: #print("systematic", sname, "not found, taking baseline") hret = hbase else: hret = hdict[sname] ret[sname2] = hret return ret
def create_variated_histos(proc, hdict, baseline="nominal", variations=shape_systematics): if not baseline in hdict.keys(): raise KeyError("baseline histogram missing") #hbase = copy.deepcopy(hdict[baseline]) hbase = hdict[baseline] ret = Results(OrderedDict()) ret["nominal"] = hbase for variation in variations: for vdir in ["up", "down"]: #print("create_variated_histos", variation, vdir) sname = "{0}__{1}".format(variation, vdir) if sname.endswith("__up"): sname2 = sname.replace("__up", "Up") elif sname.endswith("__down"): sname2 = sname.replace("__down", "Down") if sname not in hdict: #print("systematic", sname, "not found, taking baseline") hret = hbase else: hret = hdict[sname] ret[sname2] = hret if (('DYLHEScaleWeight' in variations) or ('EWZLHEScaleWeight' in variations)): h_lhe = [] h_nom_up = copy.deepcopy(hbase) h_nom_down = copy.deepcopy(hbase) for i in range(9): sname = 'LHEScaleWeight__{0}'.format(i) h_lhe.append(hdict[sname]) for k in range(len(h_lhe[0].contents)): for i in range(9): if (h_lhe[i].contents[k] > h_nom_up.contents[k]): h_nom_up.contents[k] = h_lhe[i].contents[k] if (h_lhe[i].contents[k] < h_nom_down.contents[k]): h_nom_down.contents[k] = h_lhe[i].contents[k] #remove the normalization aspect from QCD scale sum_nom_up = np.sum(h_nom_up.contents) sum_nom_down = np.sum(h_nom_down.contents) for k in range(len(h_nom_up.contents)): h_nom_up.contents[k] = h_nom_up.contents[k] * np.sum( hbase.contents) / sum_nom_up h_nom_down.contents[k] = h_nom_down.contents[k] * np.sum( hbase.contents) / sum_nom_down if ('dy' in proc): ret['DYLHEScaleWeightUp'] = h_nom_up ret['DYLHEScaleWeightDown'] = h_nom_down elif ('ewk' in proc): ret['EWZLHEScaleWeightUp'] = h_nom_up ret['EWZLHEScaleWeightDown'] = h_nom_down return ret
def create_datacard(dict_procs, parameter_name, all_processes, histname, baseline, variations, weight_xs): ret = Results(OrderedDict()) event_counts = {} hists_mc = [] for pid, pid_procs in proc_grps: event_counts[pid] = 0 for proc in all_processes: #print("create_datacard", proc) rr = dict_procs[proc] _variations = variations #don't produce variated histograms for data if proc == "data": _variations = [] variated_histos = create_variated_histos(proc, rr, baseline, _variations) for syst_name, histo in variated_histos.items(): if proc != "data": histo = histo * weight_xs[proc] if syst_name == "nominal": found_proc = 0 for pid, pid_procs in proc_grps: if proc in pid_procs: event_counts[pid] += np.sum(histo.contents) found_proc = 1 #print(pid,proc, syst_name, np.sum(histo.contents)) if proc != "data": hists_mc += [histo] if found_proc == 0: event_counts[proc] = np.sum(histo.contents) #create histogram name for combine datacard hist_name = "{0}__{2}".format(proc, histname, syst_name) if hist_name == "data__nominal": hist_name = "data_obs" hist_name = hist_name.replace("__nominal", "") ret[hist_name] = copy.deepcopy(histo) assert (len(hists_mc) > 0) hist_mc_tot = copy.deepcopy(hists_mc[0]) for h in hists_mc[:1]: hist_mc_tot += h ret["data_fake"] = hist_mc_tot ret_g = group_samples_datacard(ret, proc_grps) return ret_g, event_counts
def merge_partial_results(dataset_name, dataset_era, outpath): results = [] partial_results = glob.glob(outpath + "/{0}_{1}_*.pkl".format(dataset_name, dataset_era)) print("Merging {0} partial results for dataset {1}_{2}".format(len(partial_results), dataset_name, dataset_era)) for res_file in partial_results: res = pickle.load(open(res_file, "rb")) results += [res] results = sum(results, Results({})) try: os.makedirs(args.out + "/results") except FileExistsError as e: pass result_filename = args.out + "/results/{0}_{1}.pkl".format(dataset_name, dataset_era) print("Saving results to {0}".format(result_filename)) with open(result_filename, "wb") as fi: pickle.dump(results, fi, protocol=pickle.HIGHEST_PROTOCOL) return
def run_analysis(dataset, out, dnnmodel, use_cuda, ismc): from keras.backend.tensorflow_backend import set_session this_worker = get_worker_wrapper() NUMPY_LIB = this_worker.NUMPY_LIB backend = this_worker.backend hists = {} histo_bins = { "nmu": np.array([0, 1, 2, 3], dtype=np.float32), "njet": np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=np.float32), "mu_pt": np.linspace(0, 300, 20), "mu_eta": np.linspace(-5, 5, 20), "mu_phi": np.linspace(-5, 5, 20), "mu_iso": np.linspace(0, 1, 20), "mu_charge": np.array([-1, 0, 1], dtype=np.float32), "met_pt": np.linspace(0, 200, 20), "jet_pt": np.linspace(0, 400, 20), "jet_eta": np.linspace(-5, 5, 20), "jet_phi": np.linspace(-5, 5, 20), "jet_btag": np.linspace(0, 1, 20), "dnnpred_m": np.linspace(0, 1, 20), "dnnpred_s": np.linspace(0, 0.2, 20), "inv_mass": np.linspace(150, 200, 20), "sumpt": np.linspace(0, 1000, 20), } t0 = time.time() i = 0 mu = dataset.structs["Muon"][i] el = dataset.structs["Electron"][i] jets = dataset.structs["Jet"][i] evvars = dataset.eventvars[i] mu.hepaccelerate_backend = backend el.hepaccelerate_backend = backend jets.hepaccelerate_backend = backend evs_all = NUMPY_LIB.ones(dataset.numevents(), dtype=NUMPY_LIB.bool) print("Lepton selection") sel_mu, sel_ev_mu = get_selected_muons(mu, 40, 20, 2.4, 0.1) sel_ev_mu = sel_ev_mu & (evvars["HLT_IsoMu24"] == True) mu.masks["selected"] = sel_mu sel_el, sel_ev_el = get_selected_electrons(el, 40, 20, 2.4, 0.1) el.masks["selected"] = sel_el nmu = ha_kernels.sum_in_offsets( backend, mu.offsets, mu.masks["selected"], evs_all, mu.masks["all"], dtype=NUMPY_LIB.int32, ) nel = ha_kernels.sum_in_offsets( backend, el.offsets, el.masks["selected"], evs_all, el.masks["all"], dtype=NUMPY_LIB.int32, ) # get contiguous arrays of the first two muons for all events mu1 = mu.select_nth(0, object_mask=sel_mu) mu2 = mu.select_nth(1, object_mask=sel_mu) el1 = el.select_nth(0, object_mask=sel_el) el2 = el.select_nth(1, object_mask=sel_el) weight_ev_mu = apply_lepton_corrections(mu, sel_mu, this_worker.electron_weights) weight_ev_el = apply_lepton_corrections(el, sel_el, this_worker.electron_weights) weights = {"nominal": weight_ev_mu * weight_ev_el} weights_jet = {} for k in weights.keys(): weights_jet[k] = NUMPY_LIB.zeros_like(jets.pt) ha_kernels.broadcast(backend, jets.offsets, weights["nominal"], weights_jet[k]) all_jecs = [("nominal", "", None)] if ismc: for i in range(this_worker.jecs_up.shape[1]): all_jecs += [(i, "up", this_worker.jecs_up[:, i])] all_jecs += [(i, "down", this_worker.jecs_down[:, i])] jets_pt_orig = NUMPY_LIB.copy(jets.pt) # per-event histograms fill_histograms_several( hists, "nominal", "hist__all__", [ (evvars["MET_pt"], "met_pt", histo_bins["met_pt"]), ], evs_all, weights, use_cuda, ) fill_histograms_several( hists, "nominal", "hist__all__", [ (jets.pt, "jets_pt", histo_bins["jet_pt"]), ], jets.masks["all"], weights_jet, use_cuda, ) print("Jet selection") # loop over the jet corrections for ijec, sdir, jec in all_jecs: systname = "nominal" if ijec != "nominal": systname = ("jec{0}".format(ijec), sdir) if not jec is None: jet_pt_corr = apply_jec(jets_pt_orig, this_worker.jecs_bins, jec) # compute the corrected jet pt jets.pt = jets_pt_orig * NUMPY_LIB.abs(jet_pt_corr) print("jec", ijec, sdir, jets.pt.mean()) # get selected jets sel_jet, sel_bjet = select_jets(jets, mu, el, sel_mu, sel_el, 40, 2.0, 0.3, 0.4) # compute the number of jets per event njet = ha_kernels.sum_in_offsets( backend, jets.offsets, sel_jet, evs_all, jets.masks["all"], dtype=NUMPY_LIB.int32, ) nbjet = ha_kernels.sum_in_offsets( backend, jets.offsets, sel_bjet, evs_all, jets.masks["all"], dtype=NUMPY_LIB.int32, ) inv_mass_3j = NUMPY_LIB.zeros(jets.numevents(), dtype=NUMPY_LIB.float32) best_comb_3j = NUMPY_LIB.zeros((jets.numevents(), 3), dtype=NUMPY_LIB.int32) if use_cuda: this_worker.kernels.comb_3_invmass_closest[32, 256]( jets.pt, jets.eta, jets.phi, jets.mass, jets.offsets, 172.0, inv_mass_3j, best_comb_3j, ) cuda.synchronize() else: this_worker.kernels.comb_3_invmass_closest( jets.pt, jets.eta, jets.phi, jets.mass, jets.offsets, 172.0, inv_mass_3j, best_comb_3j, ) best_btag = NUMPY_LIB.zeros(jets.numevents(), dtype=NUMPY_LIB.float32) if use_cuda: this_worker.kernels.max_val_comb[32, 1024](jets.btag, jets.offsets, best_comb_3j, best_btag) cuda.synchronize() else: this_worker.kernels.max_val_comb(jets.btag, jets.offsets, best_comb_3j, best_btag) # get the events with at least three jets sel_ev_jet = njet >= 3 sel_ev_bjet = nbjet >= 1 selected_events = (sel_ev_mu | sel_ev_el) & sel_ev_jet & sel_ev_bjet print("Selected {0} events".format(selected_events.sum())) # get contiguous vectors of the first two jet data jet1 = jets.select_nth(0, object_mask=sel_jet) jet2 = jets.select_nth(1, object_mask=sel_jet) jet3 = jets.select_nth(2, object_mask=sel_jet) # create a mask vector for the first two jets first_two_jets = NUMPY_LIB.zeros_like(sel_jet) inds = NUMPY_LIB.zeros_like(evs_all, dtype=NUMPY_LIB.int32) targets = NUMPY_LIB.ones_like(evs_all, dtype=NUMPY_LIB.int32) inds[:] = 0 ha_kernels.set_in_offsets( backend, jets.offsets, first_two_jets, inds, targets, selected_events, sel_jet, ) inds[:] = 1 ha_kernels.set_in_offsets( backend, jets.offsets, first_two_jets, inds, targets, selected_events, sel_jet, ) # compute the invariant mass of the first two jets dijet_inv_mass, dijet_pt = compute_inv_mass(jets, selected_events, sel_jet & first_two_jets, use_cuda) sumpt_jets = ha_kernels.sum_in_offsets(backend, jets.offsets, jets.pt, selected_events, sel_jet) # create a keras-like array arr = NUMPY_LIB.vstack([ nmu, nel, njet, dijet_inv_mass, dijet_pt, mu1["pt"], mu1["eta"], mu1["phi"], mu1["charge"], mu1["pfRelIso03_all"], mu2["pt"], mu2["eta"], mu2["phi"], mu2["charge"], mu2["pfRelIso03_all"], el1["pt"], el1["eta"], el1["phi"], el1["charge"], el1["pfRelIso03_all"], el2["pt"], el2["eta"], el2["phi"], el2["charge"], el2["pfRelIso03_all"], jet1["pt"], jet1["eta"], jet1["phi"], jet1["btag"], jet2["pt"], jet2["eta"], jet2["phi"], jet2["btag"], inv_mass_3j, best_btag, sumpt_jets, ]).T # print("evaluating DNN model") with this_worker.graph.as_default(): set_session(this_worker.session) pred = dnnmodel.eval(arr, use_cuda) pred = NUMPY_LIB.vstack(pred).T pred_m = NUMPY_LIB.mean(pred, axis=1) pred_s = NUMPY_LIB.std(pred, axis=1) fill_histograms_several( hists, systname, "hist__nmu1_njetge3_nbjetge1__", [ (pred_m, "pred_m", histo_bins["dnnpred_m"]), (pred_s, "pred_s", histo_bins["dnnpred_s"]), (nmu, "nmu", histo_bins["nmu"]), (nel, "nel", histo_bins["nmu"]), (njet, "njet", histo_bins["njet"]), (mu1["pt"], "mu1_pt", histo_bins["mu_pt"]), (mu1["eta"], "mu1_eta", histo_bins["mu_eta"]), (mu1["phi"], "mu1_phi", histo_bins["mu_phi"]), (mu1["charge"], "mu1_charge", histo_bins["mu_charge"]), (mu1["pfRelIso03_all"], "mu1_iso", histo_bins["mu_iso"]), (mu2["pt"], "mu2_pt", histo_bins["mu_pt"]), (mu2["eta"], "mu2_eta", histo_bins["mu_eta"]), (mu2["phi"], "mu2_phi", histo_bins["mu_phi"]), (mu2["charge"], "mu2_charge", histo_bins["mu_charge"]), (mu2["pfRelIso03_all"], "mu2_iso", histo_bins["mu_iso"]), (el1["pt"], "el1_pt", histo_bins["mu_pt"]), (el1["eta"], "el1_eta", histo_bins["mu_eta"]), (el1["phi"], "el1_phi", histo_bins["mu_phi"]), (el1["charge"], "el1_charge", histo_bins["mu_charge"]), (el1["pfRelIso03_all"], "el1_iso", histo_bins["mu_iso"]), (el2["pt"], "el2_pt", histo_bins["mu_pt"]), (el2["eta"], "el2_eta", histo_bins["mu_eta"]), (el2["phi"], "el2_phi", histo_bins["mu_phi"]), (el2["charge"], "el2_charge", histo_bins["mu_charge"]), (el2["pfRelIso03_all"], "el2_iso", histo_bins["mu_iso"]), (jet1["pt"], "j1_pt", histo_bins["jet_pt"]), (jet1["eta"], "j1_eta", histo_bins["jet_eta"]), (jet1["phi"], "j1_phi", histo_bins["jet_phi"]), (jet1["btag"], "j1_btag", histo_bins["jet_btag"]), (jet2["pt"], "j2_pt", histo_bins["jet_pt"]), (jet2["eta"], "j2_eta", histo_bins["jet_eta"]), (jet2["phi"], "j2_phi", histo_bins["jet_phi"]), (jet2["btag"], "j2_btag", histo_bins["jet_btag"]), (inv_mass_3j, "inv_mass_3j", histo_bins["inv_mass"]), (best_btag, "best_btag", histo_bins["jet_btag"]), (sumpt_jets, "sumpt", histo_bins["sumpt"]), ], selected_events, weights, use_cuda, ) # save the array for the first jet correction scenario only if save_arrays and ijec == 0: outfile_arr = "{0}_arrs.npy".format(out) print("Saving array with shape {0} to {1}".format( arr.shape, outfile_arr)) with open(outfile_arr, "wb") as fi: np.save(fi, NUMPY_LIB.asnumpy(arr)) t1 = time.time() res = Results({}) for hn in hists.keys(): hists[hn] = Results(hists[hn]) res["hists"] = Results(hists) res["numevents"] = dataset.numevents() speed = dataset.numevents() / (t1 - t0) print("run_analysis: {0:.2E} events in {1:.2f} seconds, speed {2:.2E} Hz". format(dataset.numevents(), t1 - t0, speed)) return res
if args.dask_server == "debug": ret = map(load_and_analyze, arglist) else: futures = client.map(load_and_analyze, arglist, retries=3) ret = [fut.result() for fut in futures] walltime_t1 = time.time() print("Merging outputs") hists = {ds[0]: [] for ds in datasets} numevents = {ds[0]: 0 for ds in datasets} for r, _args in zip(ret, arglist): rh = r["hists"] ds = _args[2] hists[ds] += [Results(r["hists"])] numevents[ds] += r["numevents"] timing = sum([r["timing"] for r in ret], Results({})) timing["cuda"] = use_cuda timing["njec"] = args.njec timing["nthreads"] = args.nthreads timing["walltime"] = walltime_t1 - walltime_t0 for k, v in hists.items(): hists[k] = sum(hists[k], Results({})) print("Writing output pkl") with open(args.out, "wb") as fi: pickle.dump({ "hists": hists,
def analyze_data(data, sample, NUMPY_LIB=None, parameters={}, samples_info={}, is_mc=True, lumimask=None, cat=False, DNN=False, DNN_model=None, jets_met_corrected=True): #Output structure that will be returned and added up among the files. #Should be relatively small. ret = Results() muons = data["Muon"] electrons = data["Electron"] scalars = data["eventvars"] jets = data["Jet"] nEvents = muons.numevents() indices = {} indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32) indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32) mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool) # apply event cleaning and PV selection flags = [ "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter" ] if not is_mc: flags.append("Flag_eeBadScFilter") for flag in flags: mask_events = mask_events & scalars[flag] mask_events = mask_events & (scalars["PV_npvsGood"] > 0) #mask_events = vertex_selection(scalars, mask_events) # apply object selection for muons, electrons, jets good_muons, veto_muons = lepton_selection(muons, parameters["muons"]) good_electrons, veto_electrons = lepton_selection(electrons, parameters["electrons"]) good_jets = jet_selection(jets, muons, (veto_muons | good_muons), parameters["jets"], jets_met_corrected) & jet_selection( jets, electrons, (veto_electrons | good_electrons), parameters["jets"], jets_met_corrected) bjets = good_jets & ( getattr(jets, parameters["btagging algorithm"]) > parameters["btagging WP"][parameters["btagging algorithm"]]) # apply basic event selection -> individual categories cut later nleps = NUMPY_LIB.add( ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, good_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8)) nMuons = ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8) nElectrons = ha.sum_in_offsets(electrons, good_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8) lepton_veto = NUMPY_LIB.add( ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, veto_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8)) njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8) btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8) if jets_met_corrected: #met = (scalars["MET_pt_nom"] > 20) met = (scalars["METFixEE2017_pt_nom"] > 20) else: met = (scalars["MET_pt"] > 20) # trigger logic # needs update for different years! trigger_el = (scalars["HLT_Ele35_WPTight_Gsf"] | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"]) & ( nleps == 1) & (nElectrons == 1) trigger_mu = (scalars["HLT_IsoMu27"]) & (nleps == 1) & (nMuons == 1) if not is_mc: if "SingleMuon" in sample: trigger_el = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool) if "SingleElectron" in sample: trigger_mu = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.bool) mask_events = mask_events & (trigger_el | trigger_mu) mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & ( njets >= 4) & (btags >= 2) & met ### calculation of all needed variables var = {} var["njets"] = njets var["btags"] = btags var["nleps"] = nleps if jets_met_corrected: pt_label = "pt_nom" else: pt_label = "pt" variables = [ ("jet", jets, good_jets, "leading", [pt_label, "eta"]), ("bjet", jets, bjets, "leading", [pt_label, "eta"]), ] # special role of lepton var["leading_lepton_pt"] = NUMPY_LIB.maximum( ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"], mask_events, good_electrons)) var["leading_lepton_eta"] = NUMPY_LIB.maximum( ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"], mask_events, good_electrons)) # all other variables for v in variables: calculate_variable_features(v, mask_events, indices, var) #synch #mask = (scalars["event"] == 2895765) # calculate weights for MC samples weights = {} weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32) if is_mc: weights["nominal"] = weights["nominal"] * scalars[ "genWeight"] * parameters["lumi"] * samples_info[sample][ "XS"] / samples_info[sample]["ngen_weight"] # pu corrections #pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"]) pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["Pileup_nTrueInt"]) weights["nominal"] = weights["nominal"] * pu_weights var["pu_weights"] = pu_weights # lepton SF corrections electron_weights = compute_lepton_weights( electrons, (electrons.deltaEtaSC + electrons.eta), electrons.pt, mask_events, good_electrons, evaluator, ["el_triggerSF", "el_recoSF", "el_idSF"]) muon_weights = compute_lepton_weights( muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons, evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"]) weights[ "nominal"] = weights["nominal"] * muon_weights * electron_weights # btag SF corrections btag_weights = compute_btag_weights(jets, mask_events, good_jets, parameters["btag_SF_target"], jets_met_corrected, parameters["btagging algorithm"]) var["btag_weights"] = btag_weights weights["nominal"] = weights["nominal"] * btag_weights #in case of data: check if event is in golden lumi file if not is_mc and not (lumimask is None): mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"]) mask_events = mask_events & mask_lumi #evaluate DNN if DNN: DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons, muons, good_muons, scalars, mask_events, nEvents, DNN, DNN_model) # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf processes = {} if sample.startswith("TTTo"): #Changed for TTV samples ttCls = scalars["genTtbarId"] % 100 processes["ttbb"] = mask_events & (ttCls >= 53) & (ttCls <= 56) processes["tt2b"] = mask_events & (ttCls == 52) processes["ttb"] = mask_events & (ttCls == 51) processes["ttcc"] = mask_events & (ttCls >= 41) & (ttCls <= 45) ttHF = ((ttCls >= 53) & (ttCls <= 56)) | (ttCls == 52) | (ttCls == 51) | ( (ttCls >= 41) & (ttCls <= 45)) processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF) else: processes["unsplit"] = mask_events for p in processes.keys(): mask_events_split = processes[p] # Categories categories = {} categories["sl_jge4_tge2"] = mask_events_split categories["sl_jge4_tge3"] = mask_events_split & (btags >= 3) categories["sl_jge4_tge4"] = mask_events_split & (btags >= 4) categories["sl_j4_tge3"] = mask_events_split & (njets == 4) & (btags >= 3) categories["sl_j5_tge3"] = mask_events_split & (njets == 5) & (btags >= 3) categories["sl_jge6_tge3"] = mask_events_split & (njets >= 6) & (btags >= 3) categories["sl_j4_t3"] = mask_events_split & (njets == 4) & (btags == 3) categories["sl_j4_tge4"] = mask_events_split & (njets == 4) & (btags >= 4) categories["sl_j5_t3"] = mask_events_split & (njets == 5) & (btags == 3) categories["sl_j5_tge4"] = mask_events_split & (njets == 5) & (btags >= 4) categories["sl_jge6_t3"] = mask_events_split & (njets >= 6) & (btags == 3) categories["sl_jge6_tge4"] = mask_events_split & (njets >= 6) & (btags >= 4) #print("sl_j4_t3", scalars["event"][categories["sl_j4_t3"]], len(scalars["event"][categories["sl_j4_t3"]])) #print("sl_j5_t3", scalars["event"][categories["sl_j5_t3"]], len(scalars["event"][categories["sl_j5_t3"]])) #print("sl_jge6_t3", scalars["event"][categories["sl_jge6_t3"]], len(scalars["event"][categories["sl_jge6_t3"]])) #print("sl_j4_tge4", scalars["event"][categories["sl_j4_tge4"]], len(scalars["event"][categories["sl_j4_tge4"]])) #print("sl_j5_tge4", scalars["event"][categories["sl_j5_tge4"]], len(scalars["event"][categories["sl_j5_tge4"]])) #print("sl_jge6_tge4", scalars["event"][categories["sl_jge6_tge4"]], len(scalars["event"][categories["sl_jge6_tge4"]])) if not isinstance(cat, list): cat = [cat] for c in cat: cut = categories[c] cut_name = c if p == "unsplit": if "Run" in sample: name = "data" + "_" + cut_name else: name = samples_info[sample]["process"] + "_" + cut_name else: name = p + "_" + cut_name # create histograms filled with weighted events for k in var.keys(): if not k in histogram_settings.keys(): raise Exception( "please add variable {0} to definitions_analysis.py". format(k)) hist = Histogram(*ha.histogram_from_vector( var[k][cut], weights["nominal"][cut], NUMPY_LIB.linspace(histogram_settings[k][0], histogram_settings[k][1], histogram_settings[k][2]))) ret["hist_{0}_{1}".format(name, k)] = hist if DNN: if DNN == "mass_fit": hist_DNN = Histogram(*ha.histogram_from_vector( DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0., 300., 30))) hist_DNN_zoom = Histogram(*ha.histogram_from_vector( DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0., 170., 30))) else: hist_DNN = Histogram(*ha.histogram_from_vector( DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0., 1., 16))) ret["hist_{0}_DNN".format(name)] = hist_DNN ret["hist_{0}_DNN_zoom".format(name)] = hist_DNN_zoom #TODO: implement JECs ## To display properties of a single event #evts = [5991859] #mask = NUMPY_LIB.zeros_like(mask_events) #for iev in evts: # mask |= (scalars["event"] == iev) ##import pdb ##pdb.set_trace() #print("mask", mask) #print('nevt', scalars["event"][mask]) #print('pass sel', mask_events[mask]) #print('nleps', nleps[mask]) #print('njets', njets[mask]) ##print('met', scalars['MET_pt_nom'][mask]) ##print('lep_pt', leading_lepton_pt[mask]) ##print('jet_pt', leading_jet_pt[mask]) ##print('lep_eta', leading_lepton_eta[mask]) #print('pu_weight', pu_weights[mask]) #print('btag_weight', btag_weights[mask]) #print('lep_weight', muon_weights[mask] * electron_weights[mask]) #print('nevents', np.count_nonzero(mask_events)) #np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) #for evt in evts: # evt_idx = NUMPY_LIB.where( scalars["event"] == evt )[0][0] # start = jets.offsets[evt_idx] # stop = jets.offsets[evt_idx+1] # print(f'!!! EVENT {evt} !!!') # print(f'njets good {njets[evt_idx]}, total {stop-start}') # #print('jets mask', nonbjets[start:stop]) # print('jets pt', jets.pt_nom[start:stop]) # print('jets eta', jets.eta[start:stop]) # print('jets btag', getattr(jets, parameters["btagging algorithm"])[start:stop]) # print('jet Id', jets.jetId[start:stop]), # print('jet puId', jets.puId[start:stop]) return ret
filenames = None if not args.filelist is None: filenames = [l.strip() for l in open(args.filelist).readlines()] else: filenames = args.filenames print("Number of files:", len(filenames)) for fn in filenames: if not fn.endswith(".root"): print(fn) raise Exception( "Must supply ROOT filename, but got {0}".format(fn)) results = Results() for ibatch, files_in_batch in enumerate( chunks(filenames, args.files_per_batch)): #define our dataset structs = ["Jet", "Muon", "Electron"] #dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event) dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event) dataset.get_cache_dir = lambda fn, loc=args.cache_location: os.path.join( loc, fn) if not args.from_cache: #Load data from ROOT files dataset.preload(nthreads=args.nthreads, verbose=True)
def analyze_data_function(data, parameters): ret = Results() ha = parameters["ha"] num_events = data["num_events"] lep = data["Lep"] lep.hepaccelerate_backend = ha lep.attrs_data["pt"] = lep.lep_pt lep.attrs_data["eta"] = lep.lep_eta lep.attrs_data["phi"] = lep.lep_phi lep.attrs_data["charge"] = lep.lep_charge lep.attrs_data["type"] = lep.lep_type lep_mass = np.zeros_like(lep["pt"], dtype=nplib.float32) lep_mass = np.where(lep["type"] == 11, 0.511, lep_mass) lep_mass = np.where(lep["type"] == 13, 105.65837, lep_mass) lep.attrs_data["mass"] = lep_mass mask_events = nplib.ones(lep.numevents(), dtype=nplib.bool) lep_ele = lep["type"] == 11 lep_muon = lep["type"] == 13 ele_Iso = np.logical_and( lep_ele, np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15, lep.lep_etcone20 / lep.pt < 0.20)) muon_Iso = np.logical_and( lep_muon, np.logical_and(lep.lep_ptcone30 / lep.pt < 0.15, lep.lep_etcone20 / lep.pt < 0.30)) pass_iso = np.logical_or(ele_Iso, muon_Iso) lep.attrs_data["pass_iso"] = pass_iso num_lep_event = kernels.sum_in_offsets( backend, lep.offsets, lep.masks["all"], mask_events, lep.masks["all"], nplib.int8, ) mask_events_4lep = num_lep_event == 4 lep_attrs = ["pt", "eta", "phi", "charge", "type", "mass", "pass_iso"] #, "ptcone30", "etcone20"] lep0 = lep.select_nth(0, mask_events_4lep, lep.masks["all"], attributes=lep_attrs) lep1 = lep.select_nth(1, mask_events_4lep, lep.masks["all"], attributes=lep_attrs) lep2 = lep.select_nth(2, mask_events_4lep, lep.masks["all"], attributes=lep_attrs) lep3 = lep.select_nth(3, mask_events_4lep, lep.masks["all"], attributes=lep_attrs) mask_event_sumchg_zero = (lep0["charge"] + lep1["charge"] + lep2["charge"] + lep3["charge"] == 0) sum_lep_type = lep0["type"] + lep1["type"] + lep2["type"] + lep3["type"] all_pass_iso = (lep0["pass_iso"] & lep1["pass_iso"] & lep2["pass_iso"] & lep3["pass_iso"]) mask_event_sum_lep_type = np.logical_or( (sum_lep_type == 44), np.logical_or((sum_lep_type == 48), (sum_lep_type == 52))) mask_events = mask_events & mask_event_sumchg_zero & mask_events_4lep & mask_event_sum_lep_type & all_pass_iso mask_lep1_passing_pt = lep1["pt"] > parameters["leading_lep_ptcut"] mask_lep2_passing_pt = lep2["pt"] > parameters["lep_ptcut"] mask_events = mask_events & mask_lep1_passing_pt & mask_lep2_passing_pt l0 = to_cartesian(lep0) l1 = to_cartesian(lep1) l2 = to_cartesian(lep2) l3 = to_cartesian(lep3) llll = {k: l0[k] + l1[k] + l2[k] + l3[k] for k in ["px", "py", "pz", "e"]} llll_sph = to_spherical(llll) llll_sph["mass"] = llll_sph["mass"] / 1000. # Convert to GeV #import pdb;pdb.set_trace(); # compute a weighted histogram weights = nplib.ones(num_events, dtype=nplib.float32) ## Add xsec weights based on sample name if parameters["is_mc"]: weights = data['eventvars']['mcWeight'] * data['eventvars'][ 'scaleFactor_PILEUP'] * data['eventvars']['scaleFactor_ELE'] * data[ 'eventvars']['scaleFactor_MUON'] * data['eventvars'][ 'scaleFactor_LepTRIGGER'] info = infofile.infos[parameters["sample"]] weights *= (lumi * 1000 * info["xsec"]) / (info["sumw"] * info["red_eff"]) bins = nplib.linspace(110, 150, 11, dtype=nplib.float32) hist_m4lep = Histogram(*kernels.histogram_from_vector( backend, llll_sph["mass"][mask_events], weights[mask_events], bins, )) # save it to the output ret["hist_m4lep"] = hist_m4lep return ret
def analyze_data(data, sample, NUMPY_LIB=None, parameters={}, samples_info={}, is_mc=True, lumimask=None, cat=False, boosted=False, DNN=False, DNN_model=None): #Output structure that will be returned and added up among the files. #Should be relatively small. ret = Results() muons = data["Muon"] electrons = data["Electron"] scalars = data["eventvars"] jets = data["Jet"] nEvents = muons.numevents() mask_events = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.bool) # apply event cleaning, PV selection and trigger selection flags = [ "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "Flag_BadChargedCandidateFilter", "Flag_ecalBadCalibFilter"] if not is_mc: flags.append("Flag_eeBadScFilter") for flag in flags: mask_events = mask_events & scalars[flag] if args.year.startswith('2016'): trigger = (scalars["HLT_Ele27_WPTight_Gsf"] | scalars["HLT_IsoMu24"] | scalars["HLT_IsoTkMu24"]) else: trigger = (scalars["HLT_Ele35_WPTight_Gsf"] | scalars["HLT_Ele28_eta2p1_WPTight_Gsf_HT150"] | scalars["HLT_IsoMu27"]) mask_events = mask_events & trigger mask_events = mask_events & (scalars["PV_npvsGood"]>0) #mask_events = vertex_selection(scalars, mask_events) # apply object selection for muons, electrons, jets good_muons, veto_muons = lepton_selection(muons, parameters["muons"]) good_electrons, veto_electrons = lepton_selection(electrons, parameters["electrons"]) good_jets = jet_selection(jets, muons, (veto_muons | good_muons), parameters["jets"]) & jet_selection(jets, electrons, (veto_electrons | good_electrons) , parameters["jets"]) bjets = good_jets & (getattr(jets, parameters["btagging algorithm"]) > parameters["btagging WP"]) # apply basic event selection -> individual categories cut later nleps = NUMPY_LIB.add(ha.sum_in_offsets(muons, good_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, good_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8)) lepton_veto = NUMPY_LIB.add(ha.sum_in_offsets(muons, veto_muons, mask_events, muons.masks["all"], NUMPY_LIB.int8), ha.sum_in_offsets(electrons, veto_electrons, mask_events, electrons.masks["all"], NUMPY_LIB.int8)) njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8) btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8) met = (scalars["MET_pt"] > 20) # apply basic event definition (inverted for boosted analysis) if boosted: mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & NUMPY_LIB.invert( (njets >= 4) & (btags >=2) ) & met else: mask_events = mask_events & (nleps == 1) & (lepton_veto == 0) & (njets >= 4) & (btags >=2) & met ### check overlap between AK4 and AK8 jets: if (based on tau32 and tau21) the AK8 jet is a t/H/W candidate remove the AK4 jet, otherwise remove the AK8 jet if boosted: fatjets = data["FatJet"] genparts = data["GenPart"] # get fatjets good_fatjets = jet_selection(fatjets, muons, (veto_muons | good_muons), parameters["fatjets"]) & jet_selection(fatjets, electrons, (veto_electrons | good_electrons), parameters["fatjets"]) bfatjets = good_fatjets & (fatjets.btagHbb > parameters["bbtagging WP"]) fatjets.tau32 = NUMPY_LIB.divide(fatjets.tau3, fatjets.tau2) fatjets.tau21 = NUMPY_LIB.divide(fatjets.tau2, fatjets.tau1) jets_to_keep = ha.mask_overlappingAK4(jets, good_jets, fatjets, good_fatjets, 1.2, tau32cut=parameters["fatjets"]["tau32cut"], tau21cut=parameters["fatjets"]["tau21cut"]) non_overlapping_fatjets = ha.mask_deltar_first(fatjets, good_fatjets, jets, good_jets, 1.2) good_jets &= jets_to_keep good_fatjets &= non_overlapping_fatjets | (fatjets.tau32 < parameters["fatjets"]["tau32cut"]) | (fatjets.tau21 < parameters["fatjets"]["tau21cut"]) #we keep fat jets which are not overlapping, or if they are either a top or W/H candidate top_candidates = (fatjets.tau32 < parameters["fatjets"]["tau32cut"]) WH_candidates = (fatjets.tau32 > tau32cut) & (fatjets.tau21 < parameters["fatjets"]["tau21cut"]) bjets = good_jets & (jets.btagDeepB > parameters["btagging WP"]) njets = ha.sum_in_offsets(jets, good_jets, mask_events, jets.masks["all"], NUMPY_LIB.int8) btags = ha.sum_in_offsets(jets, bjets, mask_events, jets.masks["all"], NUMPY_LIB.int8) bbtags = ha.sum_in_offsets(fatjets, bfatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8) ntop_candidates = ha.sum_in_offsets(fatjets, top_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8) nWH_candidates = ha.sum_in_offsets(fatjets, WH_candidates, mask_events, fatjets.masks["all"], NUMPY_LIB.int8) ### 2 fat jets from H and W, 2 b jets from the tops #mask_events &= (nWH_candidates > 1) & (btags > 1) ### 1 top candidate and 1 H candidate, and 1 b jet from the leptonic top mask_events &= (ntop_candidates > 0) & (nWH_candidates > 0) & (btags > 0) ### calculation of all needed variables var = {} var["njets"] = njets var["btags"] = btags var["nleps"] = nleps if boosted: higgs = (genparts.pdgId == 25) & (genparts.status==62) tops = ( (genparts.pdgId == 6) | (genparts.pdgId == -6) ) & (genparts.status==62) var["nfatjets"] = ha.sum_in_offsets(fatjets, good_fatjets, mask_events, fatjets.masks["all"], NUMPY_LIB.int8) var["ntop_candidates"] = ha.sum_in_offsets(fatjets, tops, mask_events, fatjets.masks["all"], NUMPY_LIB.int8) indices = {} indices["leading"] = NUMPY_LIB.zeros(nEvents, dtype=NUMPY_LIB.int32) indices["subleading"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.int32) if boosted: indices["inds_WHcandidates"] = ha.index_in_offsets(fatjets.btagHbb, fatjets.offsets, 1, mask_events, WH_candidates) variables = [ ("jet", jets, good_jets, "leading", ["pt", "eta"]), ("bjet", jets, bjets, "leading", ["pt", "eta"]), ] if boosted: variables += [ ("fatjet", fatjets, good_fatjets, "leading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]), ("fatjet", fatjets, good_fatjets, "subleading",["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]), ("top_candidate", fatjets, top_candidates, "leading", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]), ("WH_candidate", fatjets, WH_candidates, "inds_WHcandidates", ["pt", "eta", "mass", "msoftdrop", "tau32", "tau21"]), ("higgs", genparts, higgs, "leading", ["pt", "eta"]), ("tops", genparts, tops, "leading", ["pt", "eta"]) ] # special role of lepton var["leading_lepton_pt"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.pt, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.pt, electrons.offsets, indices["leading"], mask_events, good_electrons)) var["leading_lepton_eta"] = NUMPY_LIB.maximum(ha.get_in_offsets(muons.eta, muons.offsets, indices["leading"], mask_events, good_muons), ha.get_in_offsets(electrons.eta, electrons.offsets, indices["leading"], mask_events, good_electrons)) # all other variables for v in variables: calculate_variable_features(v, mask_events, indices, var) # calculate weights for MC samples weights = {} weights["nominal"] = NUMPY_LIB.ones(nEvents, dtype=NUMPY_LIB.float32) if is_mc: weights["nominal"] = weights["nominal"] * scalars["genWeight"] * parameters["lumi"] * samples_info[sample]["XS"] / samples_info[sample]["ngen_weight"] # pu corrections pu_weights = compute_pu_weights(parameters["pu_corrections_target"], weights["nominal"], scalars["Pileup_nTrueInt"], scalars["PV_npvsGood"]) weights["nominal"] = weights["nominal"] * pu_weights # lepton SF corrections electron_weights = compute_lepton_weights(electrons, electrons.pt, (electrons.deltaEtaSC + electrons.eta), mask_events, good_electrons, evaluator, ["el_triggerSF", "el_recoSF", "el_idSF"]) muon_weights = compute_lepton_weights(muons, muons.pt, NUMPY_LIB.abs(muons.eta), mask_events, good_muons, evaluator, ["mu_triggerSF", "mu_isoSF", "mu_idSF"]) weights["nominal"] = weights["nominal"] * muon_weights * electron_weights # btag SF corrections btag_weights = compute_btag_weights(jets, mask_events, good_jets, evaluator) weights["nominal"] = weights["nominal"] * btag_weights #in case of data: check if event is in golden lumi file if not is_mc and not (lumimask is None): mask_lumi = lumimask(scalars["run"], scalars["luminosityBlock"]) mask_events = mask_events & mask_lumi #evaluate DNN if DNN: DNN_pred = evaluate_DNN(jets, good_jets, electrons, good_electrons, muons, good_muons, scalars, mask_events, DNN, DNN_model) # in case of tt+jets -> split in ttbb, tt2b, ttb, ttcc, ttlf processes = {} if sample.startswith("TT"): ttCls = scalars["genTtbarId"]%100 processes["ttbb"] = mask_events & (ttCls >=53) & (ttCls <=56) processes["tt2b"] = mask_events & (ttCls ==52) processes["ttb"] = mask_events & (ttCls ==51) processes["ttcc"] = mask_events & (ttCls >=41) & (ttCls <=45) ttHF = ((ttCls >=53) & (ttCls <=56)) | (ttCls ==52) | (ttCls ==51) | ((ttCls >=41) & (ttCls <=45)) processes["ttlf"] = mask_events & NUMPY_LIB.invert(ttHF) else: processes["unsplit"] = mask_events for p in processes.keys(): mask_events_split = processes[p] # Categories categories = {} if not boosted: categories["sl_jge4_tge2"] = mask_events_split categories["sl_jge4_tge3"] = mask_events_split & (btags >=3) categories["sl_j4_tge3"] = mask_events_split & (njets ==4) & (btags >=3) categories["sl_j5_tge3"] = mask_events_split & (njets ==5) & (btags >=3) categories["sl_jge6_tge3"] = mask_events_split & (njets >=6) & (btags >=3) categories["sl_j4_t3"] = mask_events_split & (njets ==4) & (btags ==3) categories["sl_j4_tge4"] = mask_events_split & (njets ==4) & (btags >=4) categories["sl_j5_t3"] = mask_events_split & (njets ==5) & (btags ==3) categories["sl_j5_tge4"] = mask_events_split & (njets ==5) & (btags >=4) categories["sl_jge6_t3"] = mask_events_split & (njets >=6) & (btags ==3) categories["sl_jge6_tge4"] = mask_events_split & (njets >=6) & (btags >=4) if not isinstance(cat, list): cat = [cat] for c in cat: cut = categories[c] cut_name = c if p=="unsplit": if "Run" in sample: name = "data" + "_" + cut_name else: name = samples_info[sample]["process"] + "_" + cut_name else: name = p + "_" + cut_name # create histograms filled with weighted events for k in var.keys(): if not k in histogram_settings.keys(): raise Exception("please add variable {0} to config_analysis.py".format(k)) hist = Histogram(*ha.histogram_from_vector(var[k][cut], weights["nominal"][cut], NUMPY_LIB.linspace(histogram_settings[k][0], histogram_settings[k][1], histogram_settings[k][2]))) ret["hist_{0}_{1}".format(name, k)] = hist if DNN: if DNN.endswith("multiclass"): class_pred = NUMPY_LIB.argmax(DNN_pred, axis=1) for n, n_name in zip([0,1,2,3,4,5], ["ttH", "ttbb", "tt2b", "ttb", "ttcc", "ttlf"]): node = (class_pred == n) DNN_node = DNN_pred[:,n] hist_DNN = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,16))) ret["hist_{0}_DNN_{1}".format(name, n_name)] = hist_DNN hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_node[(cut & node)], weights["nominal"][(cut & node)], NUMPY_LIB.linspace(0.,1.,1000))) ret["hist_{0}_DNN_ROC_{1}".format(name, n_name)] = hist_DNN_ROC else: hist_DNN = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,16))) ret["hist_{0}_DNN".format(name)] = hist_DNN hist_DNN_ROC = Histogram(*ha.histogram_from_vector(DNN_pred[cut], weights["nominal"][cut], NUMPY_LIB.linspace(0.,1.,1000))) ret["hist_{0}_DNN_ROC".format(name)] = hist_DNN_ROC #TODO: implement JECs return ret
arrays_event += ["PV_npvsGood", "Pileup_nTrueInt", "genWeight"] filenames = None if not args.filelist is None: filenames = [l.strip() for l in open(args.filelist).readlines()] else: filenames = args.filenames print("Number of files:", len(filenames)) for fn in filenames: if not fn.endswith(".root"): print(fn) raise Exception("Must supply ROOT filename, but got {0}".format(fn)) results = Results() for ibatch, files_in_batch in enumerate(chunks(filenames, args.files_per_batch)): #define our dataset structs = ["Jet", "Muon", "Electron"] if args.boosted: structs.append(["FatJet", "GenPart"]) dataset = NanoAODDataset(files_in_batch, arrays_objects + arrays_event, "Events", structs, arrays_event) dataset.get_cache_dir = lambda fn,loc=args.cache_location: os.path.join(loc, fn) if not args.from_cache: #Load data from ROOT files dataset.preload(nthreads=args.nthreads, verbose=True) #prepare the object arrays on the host or device
import numpy as np from hepaccelerate.utils import Histogram, Results from glob import glob import json,os,argparse from pdb import set_trace flist = glob('results/201*/v12/met20_btagDDBvL086/nominal/btagEfficiencyMaps/out_btagEfficiencyMaps_*json') def divide(h1,h2): contents = h1.contents/h2.contents contents_w2 = h1.contents_w2/h2.contents_w2 edges = h1.edges return Histogram(contents, contents_w2, edges) for fn in flist: with open(fn) as f: data = json.load(f) for h in data: data[h] = Histogram( *data[h].values() ) for flav in ['b','l','lc']: for var in ['central','updown']: data[f'eff_flav{flav}_{var}'] = divide( data[f'btags_flav{flav}_{var}'], data[f'total_flav{flav}_{var}'] ) ret = Results(data) ret.save_json(fn)