def getFlavourClassificationData(self,filename,TupleMeanStd, weighter): from DeepJetCore.stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples=tree.GetEntries() #print('took ', sw.getAndReset(), ' seconds for getting tree entries') Tuple = self.readTreeFromRootToTuple(filename) x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples) #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') notremoves=numpy.array([]) weights=numpy.array([]) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) weights=notremoves #print('took ', sw.getAndReset(), ' to create remove indices') elif self.weight: #print('creating weights') weights= weighter.getJetWeights(Tuple) else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: #print('remove') weights=weights[notremoves > 0] x_all=x_all[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_all.shape[0] #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp #print('took in total ', swall.getAndReset(),' seconds for conversion') return weights,x_all,alltruth, notremoves
def convertFromSourceFile(self, filename, weighterobjects, istraining): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,None, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,None, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,None, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = MeanNormZeroPadParticles(filename,None, self.branches[3], self.branchcutoffs[3],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) print(x_global.shape,self.nsamples) return [x_global,x_cpf,x_npf,x_sv], [alltruth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1], self.branchcutoffs[1], self.nsamples) x_etarel = MeanNormZeroPadParticles(filename, None, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, None, self.branches[3], self.branchcutoffs[3], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') npy_array = self.readTreeFromRootToTuple(filename) reg_truth = npy_array['gen_pt_WithNu'].view(numpy.ndarray) reco_pt = npy_array['jet_corr_pt'].view(numpy.ndarray) correctionfactor = numpy.zeros(self.nsamples) for i in range(self.nsamples): correctionfactor[i] = reg_truth[i] / reco_pt[i] truthtuple = npy_array[self.truthclasses] alltruth = self.reduceTruth(truthtuple) self.x = [x_global, x_cpf, x_etarel, x_sv, reco_pt] self.y = [alltruth, correctionfactor] self._normalize_input_(weighter, npy_array)
def convertFromSourceFile(self, filename, weighterobjects, istraining): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, ['x'], [1], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple( filename, branches=['class1', 'class2', 'x']) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) #print(x_global.shape,x_global[0:10]) #print(alltruth.shape,alltruth[0:10]) #print(alltruth.flags) newnsamp = x_global.shape[0] self.nsamples = newnsamp print(x_global.shape, alltruth.shape, self.nsamples) truth = SimpleArray(alltruth, name="truth") feat = SimpleArray(x_global, name="features0") return [feat], [truth], []
def writeData_async(index, woq, wrlck): logger.info('async started') sw = stopwatch() td = self.dataclass() sample = self.sourceList[index] if self.batch_mode or self.no_copy_on_convert: tmpinput = sample def removefile(): pass else: tmpinput = tempstoragepath + '/' + str( os.getpid()) + '_tmp_' + os.path.basename(sample) def removefile(): os.system('rm -f ' + tmpinput) import atexit atexit.register(removefile) logger.info('start cp') os_ret = os.system('cp ' + sample + ' ' + tmpinput) if os_ret: raise Exception("copy to ramdisk not successful for " + sample) success = False out_samplename = '' out_sampleentries = 0 sbasename = os.path.basename(sample) newname = sbasename[:sbasename.rfind('.')] + '.djctd' newpath = os.path.abspath(outputDir + newname) try: logger.info('convertFromSourceFile') td.writeFromSourceFile(tmpinput, self.weighterobjects, istraining=not self.istestdata, outname=newpath) print('converted and written ' + newname + ' in ', sw.getAndReset(), ' sec -', index) out_samplename = newname out_sampleentries = 1 success = True td.clear() removefile() woq.put((index, [success, out_samplename, out_sampleentries])) except: print('problem in ' + newname) removefile() woq.put((index, [False, out_samplename, out_sampleentries])) raise
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [] self.x = [x_global] self.y = [alltruth]
def __writeData(self, sample, outputDir): sw=stopwatch() td=self.dataclass() fileTimeOut(sample,120) #once available copy to ram sbasename = os.path.basename(sample) newname = sbasename[:sbasename.rfind('.')]+'.djctd' newpath=os.path.abspath(outputDir+newname) td.writeFromSourceFile(sample, self.weighterobjects, istraining=not self.istestdata, outname=newpath) print('converted and written '+newname+' in ',sw.getAndReset(),' sec') self.samples.append(newname) td.clear() if not self.batch_mode: self.writeToFile(outputDir+'/snapshot.djcdc')
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False #def reduceTruth(uproot_arrays): # #import numpy as np # prompt = uproot_arrays[b'lep_isPromptId_Training'] # nonPrompt = uproot_arrays[b'lep_isNonPromptId_Training'] # fake = uproot_arrays[b'lep_isFakeId_Training'] # print (prompt, nonPrompt, fake) # return np.vstack((prompt, nonPrompt, fake)).transpose() # #return np.concatenate( [ prompt, nonPrompt, fake] ) print('reading '+filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles x_global = MeanNormZeroPad(filename,None, [self.global_branches], [1],self.nsamples) x_pfCand_neutral = MeanNormZeroPadParticles(filename,None, self.pfCand_neutral_branches, self.npfCand_neutral,self.nsamples) x_pfCand_charged = MeanNormZeroPadParticles(filename,None, self.pfCand_charged_branches, self.npfCand_charged,self.nsamples) x_pfCand_photon = MeanNormZeroPadParticles(filename,None, self.pfCand_photon_branches, self.npfCand_photon,self.nsamples) x_pfCand_electron = MeanNormZeroPadParticles(filename,None, self.pfCand_electron_branches, self.npfCand_electron,self.nsamples) x_pfCand_muon = MeanNormZeroPadParticles(filename,None, self.pfCand_muon_branches, self.npfCand_muon,self.nsamples) x_pfCand_SV = MeanNormZeroPadParticles(filename,None, self.SV_branches, self.nSV,self.nsamples) #import uproot3 as uproot #urfile = uproot.open(filename)["tree"] #truth_arrays = urfile.arrays(self.truth_branches) #truth = reduceTruth(truth_arrays) #truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! import uproot3 as uproot urfile = uproot.open(filename)["tree"] truth = np.concatenate([np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1) , np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1), np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)],axis=1) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! x_global = x_global.astype(dtype='float32', order='C') x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C') x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C') x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C') x_pfCand_electron = x_pfCand_electron.astype(dtype='float32', order='C') x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C') x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX,self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array( filename, treename = "tree", stop = None, branches = b ) notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove) #undef=for_remove['isUndefined'] #notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') #if counter_all == 0: # notremoves = list(np.ones(np.shape(notremoves))) if self.remove: #print('remove') print ("notremoves", notremoves, "<- notremoves") x_global = x_global[notremoves > 0] x_pfCand_neutral = x_pfCand_neutral[notremoves > 0] x_pfCand_charged = x_pfCand_charged[notremoves > 0] x_pfCand_photon = x_pfCand_photon[notremoves > 0] x_pfCand_electron = x_pfCand_electron[notremoves > 0] x_pfCand_muon = x_pfCand_muon[notremoves > 0] x_pfCand_SV = x_pfCand_SV[notremoves > 0] truth = truth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') #print(x_global) #print(x_pfCand_neutral) #print(x_pfCand_charged) #print(x_pfCand_photon) #print(x_pfCand_electron) #print(x_pfCand_muon) #print(x_pfCand_SV) print('remove nans') x_global = np.where(np.isfinite(x_global) , x_global, 0) x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0) x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0) x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0) x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0) x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0) x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0) return [x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV], [truth], []
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False def reduceTruth(uproot_arrays): b = uproot_arrays[b'isB'] bb = uproot_arrays[b'isBB'] gbb = uproot_arrays[b'isGBB'] bl = uproot_arrays[b'isLeptonicB'] blc = uproot_arrays[b'isLeptonicB_C'] lepb = bl + blc c = uproot_arrays[b'isC'] cc = uproot_arrays[b'isCC'] gcc = uproot_arrays[b'isGCC'] ud = uproot_arrays[b'isUD'] s = uproot_arrays[b'isS'] uds = ud + s g = uproot_arrays[b'isG'] return np.vstack( (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose() print('reading ' + filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles x_global = MeanNormZeroPad(filename, weighterobjects['means'], [ self.global_branches, self.track_branches, self.eta_rel_branches, self.vtx_branches ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples) import uproot3 as uproot urfile = uproot.open(filename)["deepntuplizer/tree"] truth_arrays = urfile.arrays(self.truth_branches) truth = reduceTruth(truth_arrays) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! x_global = x_global.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX, self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array(filename, treename="deepntuplizer/tree", stop=None, branches=b) notremoves = weighterobjects['weigther'].createNotRemoveIndices( for_remove) undef = for_remove['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.remove: print('remove') x_global = x_global[notremoves > 0] truth = truth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') print('remove nans') x_global = np.where( np.logical_and(np.isfinite(x_global), (np.abs(x_global) < 100000.0)), x_global, 0) return [x_global], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] alltruth = alltruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_cpf, x_npf, x_sv] self.y = [alltruth]
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False def reduceTruth(uproot_arrays): b = uproot_arrays[str.encode(map_prefix(b'Jet_isB'))] bb = uproot_arrays[str.encode(map_prefix(b'Jet_isBB'))] gbb = uproot_arrays[str.encode(map_prefix(b'Jet_isGBB'))] bl = uproot_arrays[str.encode(map_prefix(b'Jet_isLeptonicB'))] blc = uproot_arrays[str.encode(map_prefix(b'Jet_isLeptonicB_C'))] lepb = bl + blc c = uproot_arrays[str.encode(map_prefix(b'Jet_isC'))] cc = uproot_arrays[str.encode(map_prefix(b'Jet_isCC'))] gcc = uproot_arrays[str.encode(map_prefix(b'Jet_isGCC'))] ud = uproot_arrays[str.encode(map_prefix(b'Jet_isUD'))] s = uproot_arrays[str.encode(map_prefix(b'Jet_isS'))] uds = ud + s g = uproot_arrays[str.encode(map_prefix(b'Jet_isG'))] return np.vstack( (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose() print('reading ' + filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename, 600) #give eos a minute to recover rfile = ROOT.TFile(filename) # tree = rfile.Get("ttree") # self.nsamples = tree.GetEntries() # from IPython import embed;embed() tree = u3.open(filename)["ttree"] self.nsamples = tree.numentries print("Nsamples: {}".format(self.nsamples)) # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles for obj in [ filename, weighterobjects['means'], [ self.global_branches, self.track_branches, self.eta_rel_branches, self.vtx_branches ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples ]: print("DEBUGGING:\t{}".format(type(obj))) print("DEBUGGING:\n\tPrinting MeanNormZeroPad arguments:") print("\t{}\n\t{}\n\t{}".format(filename, weighterobjects['means'], self.nsamples)) print("reading in with new uproot+awkward function") nparr = uproot_tree_to_numpy( filename, weighterobjects['means'], [ self.global_branches, self.track_branches, self.eta_rel_branches, self.vtx_branches ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples, treename="ttree") print("succesfully created numpy array") x_global = nparr # x_global = MeanNormZeroPad(filename,weighterobjects['means'], # [self.global_branches,self.track_branches,self.eta_rel_branches,self.vtx_branches], # [1,self.n_track,self.n_eta_rel,self.n_vtx],self.nsamples) print("opening file with uproot") import uproot3 as uproot urfile = uproot.open(filename)["ttree"] truth_arrays = urfile.arrays(self.truth_branches) print("truth_branches:") print(self.truth_branches) print("truth_arrays:") print(truth_arrays) truth = reduceTruth(truth_arrays) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! x_global = x_global.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX, self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = uproot_root2array(filename, treename="ttree", stop=None, branches=b) notremoves = weighterobjects['weigther'].createNotRemoveIndices( for_remove) undef = for_remove['Jet_isUndefined'] print("\nundef:") print(undef) print("undef dtype: ", undef.dtype) print() print(notremoves) notremoves -= np.array(undef, dtype=np.float32) print('took ', sw.getAndReset(), ' to create remove indices') if self.remove: print('remove') x_global = x_global[notremoves > 0] truth = truth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') print('remove nans') x_global = np.where( np.logical_and(np.isfinite(x_global), (np.abs(x_global) < 100000.0)), x_global, 0) return [x_global], [truth], []
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False print('reading ' + filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename, 120) # give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles print('padding ' + filename) x_global = MeanNormZeroPad( filename, None, # 2nd argument None: means no normalisation [self.global_branches], [1], self.nsamples) x_pfCand_neutral = MeanNormZeroPadParticles( filename, None, self.pfCand_neutral_branches, self.npfCand_neutral, self.nsamples) x_pfCand_charged = MeanNormZeroPadParticles( filename, None, self.pfCand_charged_branches, self.npfCand_charged, self.nsamples) x_pfCand_photon = MeanNormZeroPadParticles(filename, None, self.pfCand_photon_branches, self.npfCand_photon, self.nsamples) x_pfCand_electron = MeanNormZeroPadParticles( filename, None, self.pfCand_electron_branches, self.npfCand_electron, self.nsamples) x_pfCand_muon = MeanNormZeroPadParticles(filename, None, self.pfCand_muon_branches, self.npfCand_muon, self.nsamples) x_pfCand_SV = MeanNormZeroPadParticles(filename, None, self.SV_branches, self.nSV, self.nsamples) import uproot3 as uproot urfile = uproot.open(filename)["tree"] mytruth = [] for arr in self.truth_branches: mytruth.append(np.expand_dims(urfile.array(arr), axis=1)) truth = np.concatenate(mytruth, axis=1) # important, float32 and C-type! truth = truth.astype(dtype='float32', order='C') x_global = x_global.astype(dtype='float32', order='C') x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C') x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C') x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C') x_pfCand_electron = x_pfCand_electron.astype(dtype='float32', order='C') x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C') x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX, self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array( # returns a structured np array filename, treename="tree", stop=None, branches=b) notremoves = weighterobjects['weigther'].createNotRemoveIndices( for_remove) print('took ', sw.getAndReset(), ' to create remove indices') if self.remove: x_global = x_global[notremoves > 0] x_pfCand_neutral = x_pfCand_neutral[notremoves > 0] x_pfCand_charged = x_pfCand_charged[notremoves > 0] x_pfCand_photon = x_pfCand_photon[notremoves > 0] x_pfCand_electron = x_pfCand_electron[notremoves > 0] x_pfCand_muon = x_pfCand_muon[notremoves > 0] x_pfCand_SV = x_pfCand_SV[notremoves > 0] truth = truth[notremoves > 0] newnsamp = x_global.shape[0] print('Weighter reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') print('removing nans') x_global = np.where(np.isfinite(x_global), x_global, 0) x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0) x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0) x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0) x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0) x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0) x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0) return [ x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV ], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) #here the difference starts x_chmap = createDensityMap(filename, TupleMeanStd, 'Cpfcan_ptrel', self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand', -1, weightbranch='Cpfcan_puppiw') x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand') x_neumap = createDensityMap(filename, TupleMeanStd, 'Npfcan_ptrel', self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand', -1, weightbranch='Npfcan_puppiw') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.ones(self.nsamples) pttruth = Tuple[self.regtruth] ptreco = Tuple[self.regreco] truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount), axis=3) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_map = x_map[notremoves > 0] alltruth = alltruth[notremoves > 0] pttruth = pttruth[notremoves > 0] ptreco = ptreco[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_map, ptreco] self.y = [alltruth, pttruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) #here the difference starts nbins = 8 x_chmap = createDensity( filename, inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_neumap = createDensity( filename, inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', nbins, 0.45], ['Cpfcan_phi', 'jet_phi', nbins, 0.45], 'nCpfcand') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', nbins, 0.45], ['Npfcan_phi', 'jet_phi', nbins, 0.45], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) regtruth = Tuple['gen_pt_WithNu'] regreco = Tuple['jet_corr_pt'] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_chmap = x_chmap[notremoves > 0] x_neumap = x_neumap[notremoves > 0] x_chcount = x_chcount[notremoves > 0] x_neucount = x_neucount[notremoves > 0] alltruth = alltruth[notremoves > 0] regreco = regreco[notremoves > 0] regtruth = regtruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount), axis=3) self.w = [weights, weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco] self.y = [alltruth, regtruth]