def __init__(self): ## make binning for hists self.dataset_axis = hist.Cat("dataset", "Event Process") self.pu_nTrueInt_axis = hist.Bin("pu_nTrueInt", "nTrueInt", 100, 0, 100) self.pu_nPU_axis = hist.Bin("pu_nPU", "nPU", 100, 0, 100) ## make dictionary of hists histo_dict = {} histo_dict['PU_nTrueInt'] = hist.Hist("PU_nTrueInt", self.dataset_axis, self.pu_nTrueInt_axis) histo_dict['PU_nPU'] = hist.Hist("PU_nPU", self.dataset_axis, self.pu_nPU_axis) #set_trace() ## construct dictionary of dictionaries to hold meta info for each sample for sample in fileset.keys(): if 'Int' in sample: histo_dict['%s_pos' % sample] = processor.defaultdict_accumulator(int) histo_dict['%s_pos_runs_to_lumis' % sample] = processor.value_accumulator(list) histo_dict['%s_neg' % sample] = processor.defaultdict_accumulator(int) histo_dict['%s_neg_runs_to_lumis' % sample] = processor.value_accumulator(list) else: histo_dict[sample] = processor.defaultdict_accumulator(int) histo_dict['%s_runs_to_lumis' % sample] = processor.value_accumulator(list) self._accumulator = processor.dict_accumulator(histo_dict) self.sample_name = ''
def test_accumulators(): a = processor.value_accumulator(float) a += 3.0 assert a.value == 3.0 assert a.identity().value == 0.0 a = processor.value_accumulator(partial(np.array, [2.0])) a += 3.0 assert np.array_equal(a.value, np.array([5.0])) assert np.array_equal(a.identity().value, np.array([2.0])) lacc = processor.list_accumulator(range(4)) lacc += [3] lacc += processor.list_accumulator([1, 2]) assert lacc == [0, 1, 2, 3, 3, 1, 2] b = processor.set_accumulator({"apples", "oranges"}) b += {"pears"} b += "grapes" assert b == {"apples", "oranges", "pears", "grapes"} c = processor.dict_accumulator({"num": a, "fruit": b}) c["num"] += 2.0 c += processor.dict_accumulator({ "num2": processor.value_accumulator(int), "fruit": processor.set_accumulator({"apples", "cherries"}), }) assert c["num2"].value == 0 assert np.array_equal(c["num"].value, np.array([7.0])) assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"} d = processor.defaultdict_accumulator(float) d["x"] = 0.0 d["x"] += 4.0 d["y"] += 5.0 d["z"] += d["x"] d["x"] += d["y"] assert d["x"] == 9.0 assert d["y"] == 5.0 assert d["z"] == 4.0 assert d["w"] == 0.0 f = processor.defaultdict_accumulator(lambda: 2.0) f["x"] += 4.0 assert f["x"] == 6.0 f += f assert f["x"] == 12.0 assert f["y"] == 2.0 a = processor.column_accumulator(np.arange(6).reshape(2, 3)) b = processor.column_accumulator(np.arange(12).reshape(4, 3)) a += b assert a.value.sum() == 81
def test_accumulators(): a = processor.value_accumulator(float) a += 3. assert a.value == 3. assert a.identity().value == 0. a = processor.value_accumulator(partial(np.array, [2.])) a += 3. assert np.array_equal(a.value, np.array([5.])) assert np.array_equal(a.identity().value, np.array([2.])) l = processor.list_accumulator(range(4)) l += [3] l += processor.list_accumulator([1, 2]) assert l == [0, 1, 2, 3, 3, 1, 2] b = processor.set_accumulator({'apples', 'oranges'}) b += {'pears'} b += 'grapes' assert b == {'apples', 'oranges', 'pears', 'grapes'} c = processor.dict_accumulator({'num': a, 'fruit': b}) c['num'] += 2. c += processor.dict_accumulator({ 'num2': processor.value_accumulator(int), 'fruit': processor.set_accumulator({'apples', 'cherries'}), }) assert c['num2'].value == 0 assert np.array_equal(c['num'].value, np.array([7.])) assert c['fruit'] == {'apples', 'oranges', 'pears', 'grapes', 'cherries'} d = processor.defaultdict_accumulator(float) d['x'] = 0. d['x'] += 4. d['y'] += 5. d['z'] += d['x'] d['x'] += d['y'] assert d['x'] == 9. assert d['y'] == 5. assert d['z'] == 4. assert d['w'] == 0. e = d + c f = processor.defaultdict_accumulator(lambda: 2.) f['x'] += 4. assert f['x'] == 6. f += f assert f['x'] == 12. assert f['y'] == 2. a = processor.column_accumulator(np.arange(6).reshape(2,3)) b = processor.column_accumulator(np.arange(12).reshape(4,3)) a += b assert a.value.sum() == 81
def get_pileup(item): dataset, filename = item file = uproot.open(filename) puhist = file["Pu"] pileup = processor.value_accumulator(partial(np.zeros, puhist.values.size)) pileup += puhist.values sumwhist = file["SumWeights"] sumw = processor.value_accumulator(int) sumw += sumwhist.values[0] return processor.dict_accumulator({ 'pileup': processor.dict_accumulator({dataset: pileup}), 'sumw': processor.dict_accumulator({dataset: sumw}), })
def __init__(self, mcEventYields=None, jetSyst='nominal'): ################################ # INITIALIZE COFFEA PROCESSOR ################################ self.mcEventYields = mcEventYields if not jetSyst in ['nominal', 'JERUp', 'JERDown', 'JESUp', 'JESDown']: raise Exception( f'{jetSyst} is not in acceptable jet systematic types [nominal, JERUp, JERDown, JESUp, JESDown]' ) self.jetSyst = jetSyst dataset_axis = hist.Cat("dataset", "Dataset") lep_axis = hist.Cat("lepFlavor", "Lepton Flavor") systematic_axis = hist.Cat("systematic", "Systematic Uncertainty") m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000) mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400) pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000) eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5) chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation", np.arange(-0.1, 20.001, .05)) ## Define axis to keep track of photon category phoCategory_axis = hist.Bin("category", r"Photon Category", [1, 2, 3, 4, 5]) phoCategory_axis.identifiers()[0].label = "Genuine Photon" phoCategory_axis.identifiers()[1].label = "Misidentified Electron" phoCategory_axis.identifiers()[2].label = "Hadronic Photon" phoCategory_axis.identifiers()[3].label = "Hadronic Fake" ### Accumulator for holding histograms self._accumulator = processor.dict_accumulator({ # 3. ADD HISTOGRAMS ## book histograms for photon pt, eta, and charged hadron isolation #'photon_pt': #'photon_eta': #'photon_chIso': ## book histogram for photon/lepton mass in a 3j0t region #'photon_lepton_mass_3j0t': ## book histogram for M3 variable #'M3': 'EventCount': processor.value_accumulator(int) }) ext = extractor() ext.add_weight_sets([ f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv" ]) ext.finalize() self.evaluator = ext.make_evaluator() self.ele_id_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_sf.coffea') self.ele_id_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_err.coffea') self.ele_reco_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_sf.coffea') self.ele_reco_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_err.coffea') self.mu_id_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_sf.coffea') self.mu_id_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_err.coffea') self.mu_iso_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_sf.coffea') self.mu_iso_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_err.coffea') self.mu_trig_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_sf.coffea') self.mu_trig_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_err.coffea')
def __init__(self, runNum=-1, eventNum=-1, mcEventYields=None): self.mcEventYields = mcEventYields dataset_axis = hist.Cat("dataset", "Dataset") lep_axis = hist.Bin("lepFlavor", r"ElectronOrMuon", 2, -1, 1) lep_axis.identifiers()[0].label = 'Electron' lep_axis.identifiers()[1].label = 'Muon' m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000) mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400) pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000) eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5) chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation", np.arange(-0.1, 20.001, .05)) ## Define axis to keep track of photon category phoCategory_axis = hist.Bin("category", r"Photon Category", [1, 2, 3, 4, 5]) phoCategory_axis.identifiers()[0].label = "Genuine Photon" phoCategory_axis.identifiers()[1].label = "Misidentified Electron" phoCategory_axis.identifiers()[2].label = "Hadronic Photon" phoCategory_axis.identifiers()[3].label = "Hadronic Fake" ### self._accumulator = processor.dict_accumulator({ ##photon histograms 'photon_pt': hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis, lep_axis), 'photon_eta': hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis, lep_axis), 'photon_chIso': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, lep_axis), 'photon_chIsoSideband': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, lep_axis), 'photon_lepton_mass': hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis, lep_axis), 'photon_lepton_mass_3j0t': hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis, lep_axis), 'M3': hist.Hist("Counts", dataset_axis, m3_axis, phoCategory_axis, lep_axis), 'M3Presel': hist.Hist("Counts", dataset_axis, m3_axis, lep_axis), 'EventCount': processor.value_accumulator(int) }) self.eventNum = eventNum self.runNum = runNum ext = extractor() ext.add_weight_sets([ f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv" ]) ext.finalize() self.evaluator = ext.make_evaluator() ele_id_file = uproot.open( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele2016/2016LegacyReReco_ElectronTight_Fall17V2.root' ) self.ele_id_sf = dense_lookup.dense_lookup( ele_id_file["EGamma_SF2D"].values, ele_id_file["EGamma_SF2D"].edges) self.ele_id_err = dense_lookup.dense_lookup( ele_id_file["EGamma_SF2D"].variances**0.5, ele_id_file["EGamma_SF2D"].edges) ele_reco_file = uproot.open( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele2016/egammaEffi.txt_EGM2D_runBCDEF_passingRECO.root' ) self.ele_reco_sf = dense_lookup.dense_lookup( ele_reco_file["EGamma_SF2D"].values, ele_reco_file["EGamma_SF2D"].edges) self.ele_reco_err = dense_lookup.dense_lookup( ele_reco_file["EGamma_SF2D"].variances**.5, ele_reco_file["EGamma_SF2D"].edges) mu_id_vals = 0 mu_id_err = 0 mu_iso_vals = 0 mu_iso_err = 0 mu_trig_vals = 0 mu_trig_err = 0 for scaleFactors in muSFFileList: id_file = uproot.open(scaleFactors['id'][0]) iso_file = uproot.open(scaleFactors['iso'][0]) trig_file = uproot.open(scaleFactors['trig'][0]) mu_id_vals += id_file[scaleFactors['id'] [1]].values * scaleFactors['scale'] mu_id_err += id_file[scaleFactors['id'] [1]].variances**0.5 * scaleFactors['scale'] mu_id_edges = id_file[scaleFactors['id'][1]].edges mu_iso_vals += iso_file[scaleFactors['iso'] [1]].values * scaleFactors['scale'] mu_iso_err += iso_file[scaleFactors['iso'] [1]].variances**0.5 * scaleFactors['scale'] mu_iso_edges = iso_file[scaleFactors['iso'][1]].edges mu_trig_vals += trig_file[scaleFactors['trig'] [1]].values * scaleFactors['scale'] mu_trig_err += trig_file[ scaleFactors['trig'][1]].variances**0.5 * scaleFactors['scale'] mu_trig_edges = trig_file[scaleFactors['trig'][1]].edges self.mu_id_sf = dense_lookup.dense_lookup(mu_id_vals, mu_id_edges) self.mu_id_err = dense_lookup.dense_lookup(mu_id_err, mu_id_edges) self.mu_iso_sf = dense_lookup.dense_lookup(mu_iso_vals, mu_iso_edges) self.mu_iso_err = dense_lookup.dense_lookup(mu_iso_err, mu_iso_edges) self.mu_trig_sf = dense_lookup.dense_lookup(mu_trig_vals, mu_trig_edges) self.mu_trig_err = dense_lookup.dense_lookup(mu_trig_err, mu_trig_edges)
if args.validate: for ds, fn in tqdm(filelist, desc='Validating files'): try: validate(ds, fn) except OSError: print("File open error for %s, %s" % (ds, fn)) exit(0) processor_instance = load(args.processor) combined_accumulator = processor.dict_accumulator({ 'stats': processor.dict_accumulator({ 'nentries': processor.value_accumulator(int), 'bytesread': processor.value_accumulator(int), 'sumworktime': processor.value_accumulator(float), 'columns_accessed': processor.set_accumulator(), }), 'job': processor_instance.accumulator.identity(), }) def work_function(item): dataset, file = item out, stats = process_file(dataset, file, processor_instance, combined_accumulator['stats'], preload_items,