def _kExtra(self, kpt, eta, nl, u, s=0, m=0): # if it is a jagged array, save the offsets then flatten everything # needed for the ternary conditions later offsets = None if isinstance(kpt, JaggedArray): offsets = kpt.offsets kpt = kpt.flatten() eta = eta.flatten() nl = nl.flatten() u = u.flatten() abseta = abs(eta) kData = self._kRes[s][m][1](abseta) # type 1 is data kMC = self._kRes[s][m][0](abseta) # type 0 is MC mask = kData > kMC x = np.zeros_like(kpt) sigma = self._sigma(kpt, eta, nl, s, m) # Rochester cbA = beta, cbN = m, as well as cbM (always 0?) = loc and cbS = scale to transform y = (x-loc)/scale in the pdf method cbA = self._cbA[s][m](abseta, nl) cbN = self._cbN[s][m](abseta, nl) loc = np.zeros_like(u) cbS = self._cbS[s][m](abseta, nl) invcdf = doublecrystalball.ppf(u, cbA, cbA, cbN, cbN, loc, cbS) x[mask] = (np.sqrt(kData[mask] * kData[mask] - kMC[mask] * kMC[mask]) * sigma[mask] * invcdf[mask]) result = np.ones_like(kpt) result[(x > -1)] = 1.0 / (1.0 + x[x > -1]) if offsets is not None: result = JaggedArray.fromoffsets(offsets, result) return result
def __call__(self, *args): inputs = list(args) offsets = None # TODO: check can use offsets (this should always be true for striped) # Alternatively we can just use starts and stops for i in range(len(inputs)): if isinstance(inputs[i], JaggedArray): if offsets is not None and offsets.base is not inputs[ i].offsets.base: if type(offsets) is int: raise Exception( 'Do not mix JaggedArrays and numpy arrays when calling derived class of lookup_base' ) elif type(offsets ) is np.ndarray and offsets.base is not inputs[ i].offsets.base: raise Exception( 'All input jagged arrays must have a common structure (offsets)!' ) offsets = inputs[i].offsets inputs[i] = inputs[i].content elif isinstance(inputs[i], np.ndarray): if offsets is not None: if type(offsets) is np.ndarray: raise Exception( 'do not mix JaggedArrays and numpy arrays when calling a derived class of lookup_base' ) offsets = -1 retval = self._evaluate(*tuple(inputs)) if offsets is not None and type(offsets) is not int: retval = JaggedArray.fromoffsets(offsets, retval) return retval
def passLooseJetSel(jet): outs = np.ones_like(jet.pt.content,dtype=np.bool) absEta = np.abs(jet.eta.content) etaVFor = (absEta <= 3.0) etaFor = (absEta <= 2.7) etaCen = (absEta <= 2.4) #forward jets outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.99) & (jet.neuEmFrac.content[etaFor] < 0.99) & (jet.nParticles.content[etaFor] > 1 ) ) #central jets outs[etaCen] &= ( (jet.chHadFrac.content[etaCen] > 0.0 ) & (jet.nCharged.content[etaCen] > 0 ) & (jet.chEmFrac.content[etaCen] < 0.99 ) ) #2.7-3.0 etaHE = etaVFor & ~etaFor outs[etaHE] &= ( (jet.neuEmFrac.content[etaHE] > 0.01) & (jet.neuHadFrac.content[etaHE] < 0.98) & (jet.nNeutrals.content[etaHE] > 2 ) ) # > 3.0 etaHF = ~etaVFor outs[etaHF] &= ( (jet.neuEmFrac.content[etaHF] > 0.90) & (jet.nNeutrals.content[etaHF] > 10 ) ) outs = JaggedArray.fromoffsets(jet.pt.offsets,outs) return outs
def getvar(events, name, default=None, parents="run"): if name in events: return events[name] if parents not in events: return None else: if isinstance(events[parents], np.ndarray): return np.full_like(events[parents], default) if isinstance(events[parents], JaggedArray): content = [default] * events[parents].flatten().shape[0] return JaggedArray.fromoffsets(events[parents].offsets, content)
def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val)
def NestNestObjArrayToJagged(objarr): """uproot read vector<vector<number>> TBranch as objectArray, this function convert it to JaggedJaggedArray """ # jaggedArray of lists jaggedList = JaggedArray.fromiter(objarr) # flat to 1 level _jagged = JaggedArray.fromiter(jaggedList.content) return JaggedArray.fromoffsets(jaggedList.offsets, _jagged)
def passJetTightLepVetoSel(jet): outs = np.ones_like(jet.pt.content,dtype=np.bool) absEta = np.abs(jet.eta.content) etaFor = (absEta <= 2.7) etaCen = (absEta <= 2.4) #forward jets outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.90) & (jet.neuEmFrac.content[etaFor] < 0.90) & (jet.nParticles.content[etaFor] > 1 ) & (jet.muonFrac.content[etaFor] < 0.8 ) ) #central jets outs[etaCen] &= ( (jet.chHadFrac.content[etaCen] > 0.0 ) & (jet.nCharged.content[etaCen] > 0 ) & (jet.chEmFrac.content[etaCen] < 0.9 ) ) outs = JaggedArray.fromoffsets(jet.pt.offsets,outs) return outs
def test_rochester(): rochester_data = lookup_tools.txt_converters.convert_rochester_file('tests/samples/RoccoR2018.txt.gz',loaduncs=True) rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data) # to test 1-to-1 agreement with official Rochester requires loading C++ files # instead, preload the correct scales in the sample directory # the script tests/samples/rochester/build_rochester.py produces these official_data_k = np.load('tests/samples/nano_dimuon_rochester.npy') official_data_err = np.load('tests/samples/nano_dimuon_rochester_err.npy') official_mc_k = np.load('tests/samples/nano_dy_rochester.npy') official_mc_err = np.load('tests/samples/nano_dy_rochester_err.npy') mc_rand = np.load('tests/samples/nano_dy_rochester_rand.npy') # test against nanoaod events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root')) data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) assert(all(np.isclose(data_k.flatten(), official_data_k))) data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_err = np.array(data_err.flatten(), dtype=float) assert(all(np.isclose(data_err, official_data_err, atol=1e-8))) # test against mc events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root')) hasgen = ~np.isnan(events.Muon.matched_gen.pt.fillna(np.nan)) mc_rand = JaggedArray.fromoffsets(hasgen.offsets, mc_rand) mc_kspread = rochester.kSpreadMC(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_ksmear = rochester.kSmearMC(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_k = np.ones_like(events.Muon.pt.flatten()) mc_k[hasgen.flatten()] = mc_kspread.flatten() mc_k[~hasgen.flatten()] = mc_ksmear.flatten() assert(all(np.isclose(mc_k, official_mc_k))) mc_errspread = rochester.kSpreadMCerror(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_errsmear = rochester.kSmearMCerror(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_err = np.ones_like(events.Muon.pt.flatten()) mc_err[hasgen.flatten()] = mc_errspread.flatten() mc_err[~hasgen.flatten()] = mc_errsmear.flatten() assert(all(np.isclose(mc_err, official_mc_err, atol=1e-8)))
def getSubCorrections(self, **kwargs): """ Returns the set of corrections for all input jets broken down by level use like: jecs = corrector.getSubCorrections(JetProperty1=jet.property1,...) 'jecs' will be formatted like [[jec_jet1 jec_jet2 ...] ...] """ localargs = kwargs firstarg = localargs[self._signature[0]] cumulativeCorrection = 1.0 offsets = None if isinstance(firstarg, JaggedArray): offsets = firstarg.offsets cumulativeCorrection = firstarg.ones_like().content for key in localargs.keys(): localargs[key] = localargs[key].content else: cumulativeCorrection = np.ones_like(firstarg) corrVars = [] if 'JetPt' in localargs.keys(): corrVars.append('JetPt') if 'JetE' in localargs.keys(): corrVars.append('JetE') if len(corrVars) == 0: raise Exception( 'No variable to correct, need JetPt or JetE in inputs!') corrections = [] for i, func in enumerate(self._funcs): sig = func.signature args = [] for input in sig: args.append(localargs[input]) corr = func(*tuple(args)) for var in corrVars: localargs[var] *= corr cumulativeCorrection *= corr corrections.append(cumulativeCorrection) if offsets is not None: for i in range(len(corrections)): corrections[i] = JaggedArray.fromoffsets( offsets, corrections[i]) return corrections
treename, branches=branches, namedecode='utf-8', entrysteps=200000)): charge = arrays['Muon_charge'] pt = arrays['Muon_pt'] eta = arrays['Muon_eta'] phi = arrays['Muon_phi'] if not isData: # for default if gen present gid = arrays['Muon_genPartIdx'] gpt = arrays['GenPart_pt'] # for backup w/o gen nl = arrays['Muon_nTrackerLayers'] u = np.random.rand(*pt.flatten().shape) u = JaggedArray.fromoffsets(pt.offsets, u) fullu += [u] for ie in range(len(pt)): subres = [] suberr = [] for im in range(len(pt[ie])): if isData: subres += [ roccor.kScaleDT(int(charge[ie][im]), float(pt[ie][im]), float(eta[ie][im]), float(phi[ie][im])) ] suberr += [ roccor.kScaleDTerror(int(charge[ie][im]), float(pt[ie][im]), float(eta[ie][im]), float(phi[ie][im]))
def process(self, events): logging.debug('starting process') output = self.accumulator.identity() dataset = events.metadata['dataset'] self._isData = dataset in [ 'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma', 'MuonEG' ] selection = processor.PackedSelection() # TODO: instead of cutflow, use processor.PackedSelection output['cutflow']['all events'] += events.size logging.debug('applying lumi mask') if self._isData: lumiMask = lumi_tools.LumiMask(self._corrections['golden']) events['passLumiMask'] = lumiMask(np.array(events.run), np.array(events.luminosityBlock)) else: events['passLumiMask'] = np.ones_like(events.run, dtype=bool) passLumiMask = events.passLumiMask selection.add('lumiMask', passLumiMask) logging.debug('adding trigger') self._add_trigger(events) passHLT = events.passHLT selection.add('trigger', passHLT) output['cutflow']['pass trigger'] += passHLT.sum() # if no trigger: fast return if passHLT.sum() == 0: return output # require one good vertex logging.debug('checking vertices') passGoodVertex = (events.PV.npvsGood > 0) output['cutflow']['good vertex'] += passGoodVertex.sum() selection.add('goodVertex', passGoodVertex) # run rochester rochester = self._rochester _muon_offsets = events.Muon.pt.offsets _charge = events.Muon.charge _pt = events.Muon.pt _eta = events.Muon.eta _phi = events.Muon.phi if self._isData: _k = rochester.kScaleDT(_charge, _pt, _eta, _phi) # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi) else: # for default if gen present _gpt = events.Muon.matched_gen.pt # for backup w/o gen _nl = events.Muon.nTrackerLayers _u = JaggedArray.fromoffsets(_muon_offsets, np.random.rand(*_pt.flatten().shape)) _hasgen = (_gpt.fillna(-1) > 0) _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], _gpt[_hasgen]) _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], _nl[~_hasgen], _u[~_hasgen]) _k = np.ones_like(_pt.flatten()) _k[_hasgen.flatten()] = _kspread.flatten() _k[~_hasgen.flatten()] = _ksmear.flatten() _k = JaggedArray.fromoffsets(_muon_offsets, _k) # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], # _gpt[_hasgen]) # _kErrsmear = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], # _nl[~_hasgen], _u[~_hasgen]) # _kErr = np.ones_like(_pt.flatten()) # _kErr[_hasgen.flatten()] = _kErrspread.flatten() # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten() # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr) mask = _pt.flatten() < 200 rochester_pt = _pt.flatten() rochester_pt[mask] = (_k * _pt).flatten()[mask] events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets, rochester_pt) logging.debug('adding muon id') self._add_muon_id(events.Muon) logging.debug('adding electron id') self._add_electron_id(events.Electron) logging.debug('selecting muons') muonId = (events.Muon.passId > 0) muons = events.Muon[muonId] logging.debug('selecting electrons') electronId = (events.Electron.passId > 0) electrons = events.Electron[electronId] passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2) output['cutflow']['two leptons'] += passTwoLeptons.sum() selection.add('twoLeptons', passTwoLeptons) # build cands # remake z to have same columns # pt eta phi mass charge pdgId logging.debug('rebuilding leptons') def rebuild(leptons): return JaggedCandidateArray.candidatesfromoffsets( leptons.offsets, pt=leptons.pt.flatten(), eta=leptons.eta.flatten(), phi=leptons.phi.flatten(), mass=leptons.mass.flatten(), charge=leptons.charge.flatten(), pdgId=leptons.pdgId.flatten(), # needed for electron SF etaSC=leptons.etaSC.flatten() if hasattr(leptons, 'etaSC') else leptons.eta.flatten(), ) newMuons = rebuild(muons) newElectrons = rebuild(electrons) logging.debug('building 2 leptons') ee_cands = newElectrons.choose(2) mm_cands = newMuons.choose(2) # combine them z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1) def bestcombination(zcands): good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0 # this keeps the first z cand in each event # should instead sort the best first # TODO: select best zcands = zcands[good_charge][:, :1] return zcands logging.debug('selecting best combinations') z_cands = bestcombination(z_cands) z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i') z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i') z1[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 1 z2[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 0 z1 = JaggedArray.fromoffsets(z_cands.offsets, z1) z2 = JaggedArray.fromoffsets(z_cands.offsets, z2) passZCand = (z_cands.counts > 0) output['cutflow']['z cand'] += passZCand.sum() selection.add('zCand', passZCand) passMassWindow = (passZCand & z_cands[( (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0) output['cutflow']['mass window'] += passMassWindow.sum() selection.add('massWindow', passMassWindow) # im sure there is a better way, but for now just do this def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val) z1pt = get_lepton_values(z1, 'pt') z2pt = get_lepton_values(z2, 'pt') passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0 output['cutflow']['pt threshold'] += passPt.sum() selection.add('ptThreshold', passPt) chanSels = {} z1pdg = get_lepton_values(z1, 'pdgId') z2pdg = get_lepton_values(z2, 'pdgId') for chan in ['ee', 'mm']: if chan == 'ee': pdgIds = (11, 11) if chan == 'mm': pdgIds = (13, 13) chanSels[chan] = ((abs(z1pdg) == pdgIds[0]) & (abs(z2pdg) == pdgIds[1])) weights = processor.Weights(events.run.size) if self._isData: output['sumw'][dataset] = 0 # always set to 0 for data else: output['sumw'][dataset] += events.genWeight.sum() weights.add('genWeight', events.genWeight) weights.add( 'pileupWeight', self._corrections['pileupWeight'](events.Pileup.nPU), self._corrections['pileupWeightUp'](events.Pileup.nPU), self._corrections['pileupWeightDown'](events.Pileup.nPU), ) zls = [z1, z2] # electron sf for ei, zl in enumerate(zls): ei = str(ei) eta = get_lepton_values(zl, 'etaSC') pt = get_lepton_values(zl, 'pt') electronRecoSF = self._corrections['electron_reco'](eta, pt) electronIdSF = self._corrections['electron_id_MVA90'](eta, pt) electronSF = np.ones_like(electronRecoSF.prod()) if ei in ['0', '1']: chans = ['ee'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) electronSF[chanSel] *= electronRecoSF[chanSel].prod() electronSF[chanSel] *= electronIdSF[chanSel].prod() weights.add('electronSF' + ei, electronSF) # muon SF for mi, zl in enumerate(zls): mi = str(mi) eta = get_lepton_values(zl, 'eta') pt = get_lepton_values(zl, 'pt') if self._year == '2016': idSF = self._corrections['muon_id_MediumID'](eta, pt) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( eta, pt) else: idSF = self._corrections['muon_id_MediumPromptID']( pt, abs(eta)) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( pt, abs(eta)) muonSF = np.ones_like(idSF.prod()) if mi in ['0', '1']: chans = ['mm'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) muonSF[chanSel] *= idSF[chanSel].prod() muonSF[chanSel] *= isoSF[chanSel].prod() weights.add('muonSF' + mi, muonSF) logging.debug('filling') for sel in self._selections: if sel == 'massWindow': cut = selection.all('lumiMask', 'trigger', 'goodVertex', 'twoLeptons', 'zCand', 'massWindow', 'ptThreshold') for chan in ['ee', 'mm']: chanSel = chanSels[chan] weight = chanSel.astype(float) * weights.weight() output[sel + '_zmass'].fill( dataset=dataset, channel=chan, mass=z_cands[cut].p4.mass.flatten(), weight=weight[cut].flatten(), ) output[sel + '_met'].fill( dataset=dataset, channel=chan, met=events.MET.pt[cut], weight=weight[cut].flatten(), ) output[sel + '_pileup'].fill( dataset=dataset, channel=chan, npvs=events.PV.npvs[cut], weight=weight[cut].flatten(), ) return output