Пример #1
0
 def discriminantLinearCorrection( self, referenceObj, subset, **kw ):
   from ROOT import TH2F
   subset = CuratedSubset.tobinary( CuratedSubset.topattern( subset ) )
   if self.subset is subset:
     self._debug("Already retrieved parameters for subset %s.", CuratedSubset.tostring( subset ) )
     return
   self._info('Running linear correction...')
   # Reset raw perf:
   self.rawPerf = kw.pop('rawPerf', None)
   self.subset = subset
   self._verbose('Getting correction data')
   self.sgnCorrData  = self._getCorrectionData( referenceObj, **kw )
   self._verbose('Getting background correction data')
   self.bkgCorrDataList = [LHThresholdCorrectionData( self.bkgHist, self.rawPerf.pf * mult, self.rawThres.thres, self.limits, 1. ) 
                           for mult in self.frMargin]
   # Set final parameters:
   self._verbose('Getting linear correction threshold')
   self.thres = PileupLinearCorrectionThreshold( intercept = self.sgnCorrData.intercept, slope = self.sgnCorrData.slope
                                               , rawThres = self.rawThres.thres
                                               , reach = self.getReach( self.sgnCorrData, self.bkgCorrDataList )
                                               , margins = self.frMargin, limits = self.limits, maxCorr = self.maxCorr
                                               , pileupStr = self._pileupShortLabel
                                               , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                                               , etBin = self.etBin, etaBin = self.etaBin )
   self._verbose('Getting performance')
   self.perf = self.getEffPoint( referenceObj.name, makeCorr = True )
   # Set performance results for monitoring purposes:
   self._debug("Retrieved following parameters and performance values using %s dataset", Dataset.tostring(Dataset.Train)  )
   self._debug("Raw: %s", self.rawPerf.asstr( perc = True, addthres = False ) )
   self._debug("Raw Threshold: %s", self.rawPerf.thresstr() )
   self._debug("<pile-up> limits: %r.", self.limits )
   self._debug("Linear correction: %s", self.perf.asstr( perc = True, addthres = False ) ) 
   self._debug("Linear correction Threshold: %s", self.perf.thresstr() ) 
   self._debug("Reach: %r", tuple(zip( self.frMargin[1:], self.thres.reach[1:])))
Пример #2
0
 def getPileup(self, subset): 
   baseinfo = self._dataCurator.getBaseInfo(subset, BaseInfo.PileUp)
   if CuratedSubset.isbinary( subset ):
     if CuratedSubset.isoperation( subset ):
       ret = [np.concatenate(b, axis = npCurrent.odim).squeeze().astype(dtype='float64') for b in baseinfo]
     else:
       ret = [b.squeeze().astype(dtype='float64') for b in baseinfo]
   else:
     if CuratedSubset.isoperation( subset ):
       ret = np.concatenate(baseinfo, axis = npCurrent.odim).squeeze().astype(dtype='float64')
     else:
       ret = baseinfo.squeeze().astype(dtype='float64')
   if (ret == 0.).all():
     self._fatal("All pile-up data is zero!")
   return ret
Пример #3
0
 def _calcEff( self, subset, output = None, pileup = None, thres = None, makeCorr = True ):
   self._verbose('Calculating efficiency for %s', CuratedSubset.tostring( subset ) )
   pileup = self.getPileup(subset) if pileup is None else pileup
   if output is None: output = self.getOutput(subset)
   if thres is None: thres = self.thres if makeCorr else self.rawThres
   args = (output, pileup) if makeCorr else (output,)
   return thres.getPerf( *args )
Пример #4
0
 def getEffPoint( self, name, subset = [None, None], outputs = [None, None], pileup = [None,None], thres = None, makeCorr = True ):
   from TuningTools.Neural import PerformancePoint
   auc = self.rawPerf.auc if self.rawPerf else -1
   if not isinstance(subset, (tuple,list)): 
     if subset is None: 
       if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
       subset = self.subset
     subset = [CuratedSubset.tosgn(subset),CuratedSubset.tobkg(subset)]
   else:
     if len(subset) is 1:
       if subset[0] is None: 
         if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
         subset = [self.subset]
       subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[0])]
     else:
       if any([s is None for s in subset]): 
         if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
         subset = [self.subset, self.subset]
       subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[1])]
   self._effSubset = subset
   if any([o is None for o in outputs]):
     #if outputs[0] is None:
     if isinstance(subset, (list,tuple)):
       # FIXME This assumes that sgnOut is cached:
       outputs = [(self.getOutput(CuratedSubset.topattern(s)) if CuratedSubset.tobinary(s) is not self.subset else o )
                  for o, s in zip([self.sgnOut,self.bkgOut], subset)]
     else:
       if CuratedSubset.tobinary(subset) is not self.subset:
         outputs = self.getOutput(CuratedSubset.topattern(subset))
       else:
         outputs = [self.sgnOut,self.bkgOut]
     # NOTE: This can be commented out to improve speed
     try:
       from libTuningToolsLib import genRoc
     except ImportError:
       from libTuningTools import genRoc
     o = genRoc(outputs[0], outputs[1], +self._ol, -self._ol, 0.001 )
     auc = Roc( o ).auc
   self._effOutput = outputs
   if thres is None: thres = self.thres if makeCorr else self.rawThres
   pd = self._calcEff( subset[0], output = outputs[0], pileup = pileup[0], thres = thres, makeCorr = makeCorr )
   pf = self._calcEff( subset[1], output = outputs[1], pileup = pileup[1], thres = thres, makeCorr = makeCorr )
   sp = calcSP(pd, 1. - pf)
   return PerformancePoint( name, sp, pd, pf, thres, perc = False, auc = auc
                          , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                          , etBin = self.etBin, etaBin = self.etaBin )
Пример #5
0
 def getOutput(self, subset): 
   self._verbose('Propagating output for subset: %s', CuratedSubset.tostring(subset))
   data = self._dataCurator[CuratedSubset.topattern( subset )]
   inputDim = self._dataCurator.nInputs
   if inputDim != self._discr.getNumNodes(0):
     self._fatal( "Data number of patterns (%d) do not match with discriminator input dimension (%d)!"
                ,  inputDim, self._discr.getNumNodes(0))
   if CuratedSubset.isbinary( subset ):
     if CuratedSubset.isoperation( subset ):
       output = [np.concatenate([self._discr.propagate_np(sd) for sd in d], axis  = npCurrent.odim) for d in data]
     else:
       output = [self._discr.propagate_np(d) for d in data]
   else:
     if CuratedSubset.isoperation( subset ):
       output = np.concatenate([self._discr.propagate_np(d) for d in data], axis = npCurrent.odim)
     else:
       output = self._discr.propagate_np(data)
   return output
Пример #6
0
 def _getCorrectionData( self, referenceObj, **kw ):
   neuron = kw.get('neuron', None )
   sort   = kw.get('sort', None )
   init   = kw.get('init', None )
   self._baseLabel = "ref%s_etBin%d_etaBin%d%s%s%s" % ( ReferenceBenchmark.tostring( referenceObj.reference )
       , self._dataCurator.etBinIdx
       , self._dataCurator.etaBinIdx
       , ( '_neuron%d' % neuron ) if neuron is not None else ''
       , ( '_sort%d' % sort ) if sort is not None else ''
       , ( '_init%d' % init ) if init is not None else '' )
   self.sgnHist, self.sgnOut = self.get2DPerfHist( CuratedSubset.tosgn(self.subset), 'signal_' + self._baseLabel,     getOutputs = True )
   self.bkgHist, self.bkgOut = self.get2DPerfHist( CuratedSubset.tobkg(self.subset), 'background_' + self._baseLabel, getOutputs = True )
   if not self.rawPerf:
     try:
       from libTuningToolsLib import genRoc
     except ImportError:
       from libTuningTools import genRoc
     # Get raw threshold:
     if referenceObj.reference is ReferenceBenchmark.Pd:
       raw_thres = RawThreshold( - np.percentile( -self.sgnOut, referenceObj.refVal * 100. ) )
     elif referenceObj.reference is ReferenceBenchmark.Pf:
       raw_thres = RawThreshold( - np.percentile( -self.bkgOut, referenceObj.refVal * 100. ) )
     else:
       o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 )
       roc = Roc( o
                , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                , etBin = self.etBin, etaBin = self.etaBin )
       self.rawPerf = roc.retrieve( referenceObj )
     if referenceObj.reference in ( ReferenceBenchmark.Pd, ReferenceBenchmark.Pf ):
       self.rawPerf = self.getEffPoint( referenceObj.name
                                      , thres = raw_thres
                                      , makeCorr = False )
       # Here we protect against choosing suboptimal Pd/Pf points:
       o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 )
       roc = Roc( o
                , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                , etBin = self.etBin, etaBin = self.etaBin )
       # Check whether we could be performing better:
       if referenceObj.reference is ReferenceBenchmark.Pd:
         mask = roc.pds >= referenceObj.refVal
       elif referenceObj.reference is ReferenceBenchmark.Pf:
         mask = roc.pfs <= referenceObj.refVal
       pds = roc.pds[mask]
       pfs = roc.pfs[mask]
       sps = roc.sps[mask]
       if referenceObj.reference is ReferenceBenchmark.Pd:
         mask = pfs <= ( 1.001 * self.rawPerf.pf )
         sps = sps[mask]
         pfs = pfs[mask]
         if len(sps):
           idx = np.argmax(sps)
           if pfs[idx] < 0.98 * self.rawPerf.pf:
             self._warning('Model is sub-optimal when performing at requested Pd.')
             self._info('Using highest SP operation point with virtually same Pf.')
             raw_thres = RawThreshold( - np.percentile( -self.bkgOut, pfs[idx] * 100. ) )
             self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
             self.rawPerf = self.getEffPoint( referenceObj.name
                                            , thres = raw_thres
                                            , makeCorr = False )
             self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
       elif referenceObj.reference is ReferenceBenchmark.Pf:
         mask = pds >= ( 0.999 * self.rawPerf.pd )
         sps = sps[mask]
         pds = pds[mask]
         if len(sps):
           idx = np.argmax(sps)
           if pds[idx] > 1.005 * self.rawPerf.pd:
             self._warning('Model is sub-optimal when performing at requested Pf.')
             self._info('Using highest SP operation point with virtually same Pd.')
             raw_thres = RawThreshold( - np.percentile( -self.sgnOut, pds[idx] * 100. ) )
             self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
             self.rawPerf = self.getEffPoint( referenceObj.name
                                            , thres = raw_thres
                                            , makeCorr = False )
             self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
   else:
     self._debug('Skipped calculating raw performance since we already have it calculated')
   self.rawThres = self.rawPerf.thres
   # use standard lh method using signal data:
   sgnCorrData = LHThresholdCorrectionData( self.sgnHist, self.rawPerf.pd, self.rawThres.thres, self.limits, 1. )
   return sgnCorrData
Пример #7
0
 def saveGraphs( self ):
   from ROOT import TH1F
   sStr = CuratedSubset.tostring( self.subset )
   self.sgnCorrData.save( 'signalCorr_' + self._baseLabel + '_' + sStr)
   for i, bkgCorrData in enumerate(self.bkgCorrDataList):
     bkgCorrData.save( 'backgroundCorr_' + str(i) + '_' + self._baseLabel  + '_' + sStr)
   
   sgnPileup = self.getPileup( CuratedSubset.tosgn( self.subset ) )
   sgnPassPileup = sgnPileup[ self.rawThres.getMask( self.sgnOut ) ]
   sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel  + '_' + sStr )
   sgnEff.Write()
   bkgPileup = self.getPileup( CuratedSubset.tobkg( self.subset ) )
   bkgPassPileup = bkgPileup[ self.rawThres.getMask( self.bkgOut ) ]
   bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel  + '_' + sStr )
   bkgEff.Write()
   # Write 2D histograms:
   self.sgnHist.Write( 'signal2DCorr_' + self._baseLabel  + '_' + sStr)
   self.bkgHist.Write( 'background2DCorr_' + self._baseLabel  + '_' + sStr)
   # Write output histograms:
   title = CuratedSubset.tostring( CuratedSubset.tosgn( self.subset ) ) + ' NN Output'
   sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
   for p in self.sgnOut: sgnTH1.Fill( p )
   sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr )
   title = CuratedSubset.tostring( CuratedSubset.tobkg( self.subset ) ) + ' NN Output'
   bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
   for p in self.bkgOut: bkgTH1.Fill( p )
   bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr )
   if self._effSubset is not None:
     esubset = CuratedSubset.tobinary( CuratedSubset.topattern( self._effSubset[0] ) ) 
     if esubset is not self.subset:
       sStr = CuratedSubset.tostring( esubset )
       # Here we have also to plot the subset where we have calculated the efficiency
       sgnPileup = self.getPileup( CuratedSubset.tosgn( self._effSubset[0] ) )
       sgnPassPileup = sgnPileup[ self.rawThres.getMask( self._effOutput[0] ) ]
       sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel  + '_' + sStr )
       sgnEff.Write()
       sgnPassPileup = sgnPileup[ self.thres.getMask( self._effOutput[0], sgnPileup ) ]
       sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalCorr_' + self._baseLabel  + '_' + sStr )
       sgnEff.Write()
       bkgPileup = self.getPileup( CuratedSubset.tobkg( self._effSubset[1] ) )
       bkgPassPileup = bkgPileup[ self.rawThres.getMask( self._effOutput[1] ) ]
       bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel  + '_' + sStr )
       bkgEff.Write()
       bkgPassPileup = bkgPileup[ self.thres.getMask( self._effOutput[1], bkgPileup ) ]
       bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundCorr_' + self._baseLabel  + '_' + sStr )
       bkgEff.Write()
       sgnHist = self.get2DPerfHist( self._effSubset[0], 'signal2DCorr_' + self._baseLabel + '_' + sStr,     outputs = self._effOutput[0] )
       sgnHist.Write('signal2DCorr_' + self._baseLabel + '_' + sStr)
       bkgHist = self.get2DPerfHist( self._effSubset[1], 'background2DCorr_' + self._baseLabel + '_' + sStr, outputs = self._effOutput[1] )
       bkgHist.Write('background2DCorr_' + self._baseLabel + '_' + sStr)
       # 1D output plots
       title = CuratedSubset.tostring( self._effSubset[0] ) + ' NN Output'
       sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
       for p in self.sgnOut: sgnTH1.Fill( p )
       sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr )
       title = CuratedSubset.tostring( self._effSubset[1] ) + ' NN Output'
       bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
       for p in self.bkgOut: bkgTH1.Fill( p )
       bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr )