示例#1
0
 def getFlavourClassificationData(self,filename,TupleMeanStd, weighter):
     from DeepJetCore.stopwatch import stopwatch
     
     sw=stopwatch()
     swall=stopwatch()
     
     import ROOT
     
     fileTimeOut(filename,120) #give eos a minute to recover
     rfile = ROOT.TFile(filename)
     tree = rfile.Get(self.treename)
     self.nsamples=tree.GetEntries()
     
     #print('took ', sw.getAndReset(), ' seconds for getting tree entries')
 
     
     Tuple = self.readTreeFromRootToTuple(filename)
     
     
     x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples)
     
     #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
     
     notremoves=numpy.array([])
     weights=numpy.array([])
     if self.remove:
         notremoves=weighter.createNotRemoveIndices(Tuple)
         weights=notremoves
         #print('took ', sw.getAndReset(), ' to create remove indices')
     elif self.weight:
         #print('creating weights')
         weights= weighter.getJetWeights(Tuple)
     else:
         print('neither remove nor weight')
         weights=numpy.empty(self.nsamples)
         weights.fill(1.)
     
     
     
     truthtuple =  Tuple[self.truthclasses]
     #print(self.truthclasses)
     alltruth=self.reduceTruth(truthtuple)
     
     #print(alltruth.shape)
     if self.remove:
         #print('remove')
         weights=weights[notremoves > 0]
         x_all=x_all[notremoves > 0]
         alltruth=alltruth[notremoves > 0]
    
     newnsamp=x_all.shape[0]
     #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
     self.nsamples = newnsamp
     
     #print('took in total ', swall.getAndReset(),' seconds for conversion')
     
     return weights,x_all,alltruth, notremoves
示例#2
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,None,
                                   self.branches[3],
                                   self.branchcutoffs[3],self.nsamples)
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        
       
        print(x_global.shape,self.nsamples)

        return [x_global,x_cpf,x_npf,x_sv], [alltruth], []
示例#3
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_etarel = MeanNormZeroPadParticles(filename, None, self.branches[2],
                                            self.branchcutoffs[2],
                                            self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, None, self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        npy_array = self.readTreeFromRootToTuple(filename)

        reg_truth = npy_array['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt = npy_array['jet_corr_pt'].view(numpy.ndarray)

        correctionfactor = numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i] = reg_truth[i] / reco_pt[i]

        truthtuple = npy_array[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        self.x = [x_global, x_cpf, x_etarel, x_sv, reco_pt]
        self.y = [alltruth, correctionfactor]
        self._normalize_input_(weighter, npy_array)
示例#4
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, ['x'], [1], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(
            filename, branches=['class1', 'class2', 'x'])

        truthtuple = Tuple[self.truthclasses]

        alltruth = self.reduceTruth(truthtuple)

        #print(x_global.shape,x_global[0:10])
        #print(alltruth.shape,alltruth[0:10])
        #print(alltruth.flags)

        newnsamp = x_global.shape[0]
        self.nsamples = newnsamp

        print(x_global.shape, alltruth.shape, self.nsamples)

        truth = SimpleArray(alltruth, name="truth")
        feat = SimpleArray(x_global, name="features0")

        return [feat], [truth], []
示例#5
0
        def writeData_async(index, woq, wrlck):

            logger.info('async started')

            sw = stopwatch()
            td = self.dataclass()
            sample = self.sourceList[index]

            if self.batch_mode or self.no_copy_on_convert:
                tmpinput = sample

                def removefile():
                    pass
            else:
                tmpinput = tempstoragepath + '/' + str(
                    os.getpid()) + '_tmp_' + os.path.basename(sample)

                def removefile():
                    os.system('rm -f ' + tmpinput)

                import atexit
                atexit.register(removefile)

                logger.info('start cp')
                os_ret = os.system('cp ' + sample + ' ' + tmpinput)
                if os_ret:
                    raise Exception("copy to ramdisk not successful for " +
                                    sample)

            success = False
            out_samplename = ''
            out_sampleentries = 0
            sbasename = os.path.basename(sample)
            newname = sbasename[:sbasename.rfind('.')] + '.djctd'
            newpath = os.path.abspath(outputDir + newname)

            try:
                logger.info('convertFromSourceFile')
                td.writeFromSourceFile(tmpinput,
                                       self.weighterobjects,
                                       istraining=not self.istestdata,
                                       outname=newpath)
                print('converted and written ' + newname + ' in ',
                      sw.getAndReset(), ' sec -', index)

                out_samplename = newname
                out_sampleentries = 1
                success = True
                td.clear()
                removefile()
                woq.put((index, [success, out_samplename, out_sampleentries]))

            except:
                print('problem in ' + newname)
                removefile()
                woq.put((index, [False, out_samplename, out_sampleentries]))
                raise
示例#6
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = []
        self.x = [x_global]
        self.y = [alltruth]
示例#7
0
    def __writeData(self, sample, outputDir):
        sw=stopwatch()
        td=self.dataclass()
        
        fileTimeOut(sample,120) #once available copy to ram

        sbasename = os.path.basename(sample)
        newname = sbasename[:sbasename.rfind('.')]+'.djctd'
        newpath=os.path.abspath(outputDir+newname)
        
        td.writeFromSourceFile(sample, self.weighterobjects, istraining=not self.istestdata, outname=newpath)
        
        print('converted and written '+newname+' in ',sw.getAndReset(),' sec')
        self.samples.append(newname)
        td.clear()
        
        if not self.batch_mode:
            self.writeToFile(outputDir+'/snapshot.djcdc')
示例#8
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw    = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        #def reduceTruth(uproot_arrays):
        #    #import numpy as np
        #    prompt    = uproot_arrays[b'lep_isPromptId_Training']
        #    nonPrompt = uproot_arrays[b'lep_isNonPromptId_Training']
        #    fake      = uproot_arrays[b'lep_isFakeId_Training']
        #    print (prompt, nonPrompt, fake)
        #    return np.vstack((prompt, nonPrompt, fake)).transpose()
        #    #return np.concatenate( [ prompt, nonPrompt, fake] )
        
        print('reading '+filename)
        
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()
        
        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.global_branches],
                                   [1],self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_neutral_branches,
                                   self.npfCand_neutral,self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_charged_branches,
                                   self.npfCand_charged,self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_photon_branches,
                                   self.npfCand_photon,self.nsamples)
        
        x_pfCand_electron = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_electron_branches,
                                   self.npfCand_electron,self.nsamples)
        
        x_pfCand_muon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_muon_branches,
                                   self.npfCand_muon,self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename,None,
                                   self.SV_branches,
                                   self.nSV,self.nsamples)

        #import uproot3 as uproot
        #urfile       = uproot.open(filename)["tree"]
        #truth_arrays = urfile.arrays(self.truth_branches)
        #truth        = reduceTruth(truth_arrays)
        #truth        = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]
        truth = np.concatenate([np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1) ,
                                np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1),
                                np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)],axis=1)
        truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        x_global            = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral    = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged    = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon     = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron   = x_pfCand_electron.astype(dtype='float32', order='C')
        x_pfCand_muon       = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV         = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX,self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(
                filename,
                treename = "tree",
                stop = None,
                branches = b
            )
            notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove)
            #undef=for_remove['isUndefined']
            #notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
            #if counter_all == 0:
            #    notremoves = list(np.ones(np.shape(notremoves)))
                
        if self.remove:
            #print('remove')
            print ("notremoves", notremoves, "<- notremoves")
            x_global            =   x_global[notremoves > 0]
            x_pfCand_neutral    =   x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged    =   x_pfCand_charged[notremoves > 0]
            x_pfCand_photon     =   x_pfCand_photon[notremoves > 0]
            x_pfCand_electron   =   x_pfCand_electron[notremoves > 0]
            x_pfCand_muon       =   x_pfCand_muon[notremoves > 0]
            x_pfCand_SV         =   x_pfCand_SV[notremoves > 0]
            truth               =   truth[notremoves > 0]

        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        #print(x_global)
        #print(x_pfCand_neutral)
        #print(x_pfCand_charged)
        #print(x_pfCand_photon)
        #print(x_pfCand_electron)
        #print(x_pfCand_muon)
        #print(x_pfCand_SV)
        
        print('remove nans')
        x_global          = np.where(np.isfinite(x_global) , x_global, 0)
        x_pfCand_neutral  = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0)
        x_pfCand_charged  = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0)
        x_pfCand_photon   = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0)
        x_pfCand_muon     = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV       = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV], [truth], []
示例#9
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        def reduceTruth(uproot_arrays):

            b = uproot_arrays[b'isB']

            bb = uproot_arrays[b'isBB']
            gbb = uproot_arrays[b'isGBB']

            bl = uproot_arrays[b'isLeptonicB']
            blc = uproot_arrays[b'isLeptonicB_C']
            lepb = bl + blc

            c = uproot_arrays[b'isC']
            cc = uproot_arrays[b'isCC']
            gcc = uproot_arrays[b'isGCC']

            ud = uproot_arrays[b'isUD']
            s = uproot_arrays[b'isS']
            uds = ud + s

            g = uproot_arrays[b'isG']

            return np.vstack(
                (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose()

        print('reading ' + filename)

        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()
        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
        x_global = MeanNormZeroPad(filename, weighterobjects['means'], [
            self.global_branches, self.track_branches, self.eta_rel_branches,
            self.vtx_branches
        ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["deepntuplizer/tree"]
        truth_arrays = urfile.arrays(self.truth_branches)
        truth = reduceTruth(truth_arrays)
        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        x_global = x_global.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(filename,
                                    treename="deepntuplizer/tree",
                                    stop=None,
                                    branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            undef = for_remove['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:
            print('remove')
            x_global = x_global[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('remove nans')
        x_global = np.where(
            np.logical_and(np.isfinite(x_global),
                           (np.abs(x_global) < 100000.0)), x_global, 0)
        return [x_global], [truth], []
示例#10
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv]
        self.y = [alltruth]
示例#11
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        def reduceTruth(uproot_arrays):

            b = uproot_arrays[str.encode(map_prefix(b'Jet_isB'))]

            bb = uproot_arrays[str.encode(map_prefix(b'Jet_isBB'))]
            gbb = uproot_arrays[str.encode(map_prefix(b'Jet_isGBB'))]

            bl = uproot_arrays[str.encode(map_prefix(b'Jet_isLeptonicB'))]
            blc = uproot_arrays[str.encode(map_prefix(b'Jet_isLeptonicB_C'))]
            lepb = bl + blc

            c = uproot_arrays[str.encode(map_prefix(b'Jet_isC'))]
            cc = uproot_arrays[str.encode(map_prefix(b'Jet_isCC'))]
            gcc = uproot_arrays[str.encode(map_prefix(b'Jet_isGCC'))]

            ud = uproot_arrays[str.encode(map_prefix(b'Jet_isUD'))]
            s = uproot_arrays[str.encode(map_prefix(b'Jet_isS'))]
            uds = ud + s

            g = uproot_arrays[str.encode(map_prefix(b'Jet_isG'))]

            return np.vstack(
                (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose()

        print('reading ' + filename)

        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 600)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        # tree = rfile.Get("ttree")
        # self.nsamples = tree.GetEntries()
        # from IPython import embed;embed()
        tree = u3.open(filename)["ttree"]
        self.nsamples = tree.numentries
        print("Nsamples: {}".format(self.nsamples))

        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
        for obj in [
                filename, weighterobjects['means'],
            [
                self.global_branches, self.track_branches,
                self.eta_rel_branches, self.vtx_branches
            ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples
        ]:
            print("DEBUGGING:\t{}".format(type(obj)))
        print("DEBUGGING:\n\tPrinting MeanNormZeroPad arguments:")
        print("\t{}\n\t{}\n\t{}".format(filename, weighterobjects['means'],
                                        self.nsamples))
        print("reading in with new uproot+awkward function")
        nparr = uproot_tree_to_numpy(
            filename,
            weighterobjects['means'], [
                self.global_branches, self.track_branches,
                self.eta_rel_branches, self.vtx_branches
            ], [1, self.n_track, self.n_eta_rel, self.n_vtx],
            self.nsamples,
            treename="ttree")
        print("succesfully created numpy array")
        x_global = nparr

        # x_global = MeanNormZeroPad(filename,weighterobjects['means'],
        # [self.global_branches,self.track_branches,self.eta_rel_branches,self.vtx_branches],
        # [1,self.n_track,self.n_eta_rel,self.n_vtx],self.nsamples)

        print("opening file with uproot")
        import uproot3 as uproot
        urfile = uproot.open(filename)["ttree"]
        truth_arrays = urfile.arrays(self.truth_branches)
        print("truth_branches:")
        print(self.truth_branches)
        print("truth_arrays:")
        print(truth_arrays)
        truth = reduceTruth(truth_arrays)
        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        x_global = x_global.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = uproot_root2array(filename,
                                           treename="ttree",
                                           stop=None,
                                           branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            undef = for_remove['Jet_isUndefined']
            print("\nundef:")
            print(undef)
            print("undef dtype: ", undef.dtype)
            print()
            print(notremoves)
            notremoves -= np.array(undef, dtype=np.float32)
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:
            print('remove')
            x_global = x_global[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('remove nans')
        x_global = np.where(
            np.logical_and(np.isfinite(x_global),
                           (np.abs(x_global) < 100000.0)), x_global, 0)
        return [x_global], [truth], []
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        print('reading ' + filename)
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  # give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles

        print('padding ' + filename)

        x_global = MeanNormZeroPad(
            filename,
            None,  # 2nd argument None: means no normalisation 
            [self.global_branches],
            [1],
            self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(
            filename, None, self.pfCand_neutral_branches, self.npfCand_neutral,
            self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(
            filename, None, self.pfCand_charged_branches, self.npfCand_charged,
            self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename, None,
                                                   self.pfCand_photon_branches,
                                                   self.npfCand_photon,
                                                   self.nsamples)

        x_pfCand_electron = MeanNormZeroPadParticles(
            filename, None, self.pfCand_electron_branches,
            self.npfCand_electron, self.nsamples)

        x_pfCand_muon = MeanNormZeroPadParticles(filename, None,
                                                 self.pfCand_muon_branches,
                                                 self.npfCand_muon,
                                                 self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename, None,
                                               self.SV_branches, self.nSV,
                                               self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]

        mytruth = []
        for arr in self.truth_branches:
            mytruth.append(np.expand_dims(urfile.array(arr), axis=1))
        truth = np.concatenate(mytruth, axis=1)

        # important, float32 and C-type!
        truth = truth.astype(dtype='float32', order='C')

        x_global = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron = x_pfCand_electron.astype(dtype='float32',
                                                     order='C')
        x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(  # returns a structured np array
                filename,
                treename="tree",
                stop=None,
                branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:

            x_global = x_global[notremoves > 0]
            x_pfCand_neutral = x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged = x_pfCand_charged[notremoves > 0]
            x_pfCand_photon = x_pfCand_photon[notremoves > 0]
            x_pfCand_electron = x_pfCand_electron[notremoves > 0]
            x_pfCand_muon = x_pfCand_muon[notremoves > 0]
            x_pfCand_SV = x_pfCand_SV[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('Weighter reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('removing nans')
        x_global = np.where(np.isfinite(x_global), x_global, 0)
        x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral),
                                    x_pfCand_neutral, 0)
        x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged),
                                    x_pfCand_charged, 0)
        x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon),
                                   x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron),
                                     x_pfCand_electron, 0)
        x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [
            x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon,
            x_pfCand_electron, x_pfCand_muon, x_pfCand_SV
        ], [truth], []
示例#13
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        #here the difference starts
        x_chmap = createDensityMap(filename,
                                   TupleMeanStd,
                                   'Cpfcan_ptrel',
                                   self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand',
                                   -1,
                                   weightbranch='Cpfcan_puppiw')

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand')

        x_neumap = createDensityMap(filename,
                                    TupleMeanStd,
                                    'Npfcan_ptrel',
                                    self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand',
                                    -1,
                                    weightbranch='Npfcan_puppiw')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.ones(self.nsamples)

        pttruth = Tuple[self.regtruth]
        ptreco = Tuple[self.regreco]

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount),
                                  axis=3)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_map = x_map[notremoves > 0]
            alltruth = alltruth[notremoves > 0]
            pttruth = pttruth[notremoves > 0]
            ptreco = ptreco[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp
        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_map, ptreco]
        self.y = [alltruth, pttruth]
示例#14
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        #here the difference starts
        nbins = 8

        x_chmap = createDensity(
            filename,
            inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_neumap = createDensity(
            filename,
            inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', nbins, 0.45],
                                   ['Cpfcan_phi', 'jet_phi', nbins, 0.45],
                                   'nCpfcand')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', nbins, 0.45],
                                    ['Npfcan_phi', 'jet_phi', nbins, 0.45],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        regtruth = Tuple['gen_pt_WithNu']
        regreco = Tuple['jet_corr_pt']

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]

            x_chmap = x_chmap[notremoves > 0]
            x_neumap = x_neumap[notremoves > 0]

            x_chcount = x_chcount[notremoves > 0]
            x_neucount = x_neucount[notremoves > 0]

            alltruth = alltruth[notremoves > 0]

            regreco = regreco[notremoves > 0]
            regtruth = regtruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount),
                                  axis=3)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco]
        self.y = [alltruth, regtruth]