def discriminantLinearCorrection( self, referenceObj, subset, **kw ): from ROOT import TH2F subset = CuratedSubset.tobinary( CuratedSubset.topattern( subset ) ) if self.subset is subset: self._debug("Already retrieved parameters for subset %s.", CuratedSubset.tostring( subset ) ) return self._info('Running linear correction...') # Reset raw perf: self.rawPerf = kw.pop('rawPerf', None) self.subset = subset self._verbose('Getting correction data') self.sgnCorrData = self._getCorrectionData( referenceObj, **kw ) self._verbose('Getting background correction data') self.bkgCorrDataList = [LHThresholdCorrectionData( self.bkgHist, self.rawPerf.pf * mult, self.rawThres.thres, self.limits, 1. ) for mult in self.frMargin] # Set final parameters: self._verbose('Getting linear correction threshold') self.thres = PileupLinearCorrectionThreshold( intercept = self.sgnCorrData.intercept, slope = self.sgnCorrData.slope , rawThres = self.rawThres.thres , reach = self.getReach( self.sgnCorrData, self.bkgCorrDataList ) , margins = self.frMargin, limits = self.limits, maxCorr = self.maxCorr , pileupStr = self._pileupShortLabel , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx , etBin = self.etBin, etaBin = self.etaBin ) self._verbose('Getting performance') self.perf = self.getEffPoint( referenceObj.name, makeCorr = True ) # Set performance results for monitoring purposes: self._debug("Retrieved following parameters and performance values using %s dataset", Dataset.tostring(Dataset.Train) ) self._debug("Raw: %s", self.rawPerf.asstr( perc = True, addthres = False ) ) self._debug("Raw Threshold: %s", self.rawPerf.thresstr() ) self._debug("<pile-up> limits: %r.", self.limits ) self._debug("Linear correction: %s", self.perf.asstr( perc = True, addthres = False ) ) self._debug("Linear correction Threshold: %s", self.perf.thresstr() ) self._debug("Reach: %r", tuple(zip( self.frMargin[1:], self.thres.reach[1:])))
def getPileup(self, subset): baseinfo = self._dataCurator.getBaseInfo(subset, BaseInfo.PileUp) if CuratedSubset.isbinary( subset ): if CuratedSubset.isoperation( subset ): ret = [np.concatenate(b, axis = npCurrent.odim).squeeze().astype(dtype='float64') for b in baseinfo] else: ret = [b.squeeze().astype(dtype='float64') for b in baseinfo] else: if CuratedSubset.isoperation( subset ): ret = np.concatenate(baseinfo, axis = npCurrent.odim).squeeze().astype(dtype='float64') else: ret = baseinfo.squeeze().astype(dtype='float64') if (ret == 0.).all(): self._fatal("All pile-up data is zero!") return ret
def _calcEff( self, subset, output = None, pileup = None, thres = None, makeCorr = True ): self._verbose('Calculating efficiency for %s', CuratedSubset.tostring( subset ) ) pileup = self.getPileup(subset) if pileup is None else pileup if output is None: output = self.getOutput(subset) if thres is None: thres = self.thres if makeCorr else self.rawThres args = (output, pileup) if makeCorr else (output,) return thres.getPerf( *args )
def getEffPoint( self, name, subset = [None, None], outputs = [None, None], pileup = [None,None], thres = None, makeCorr = True ): from TuningTools.Neural import PerformancePoint auc = self.rawPerf.auc if self.rawPerf else -1 if not isinstance(subset, (tuple,list)): if subset is None: if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.") subset = self.subset subset = [CuratedSubset.tosgn(subset),CuratedSubset.tobkg(subset)] else: if len(subset) is 1: if subset[0] is None: if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.") subset = [self.subset] subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[0])] else: if any([s is None for s in subset]): if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.") subset = [self.subset, self.subset] subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[1])] self._effSubset = subset if any([o is None for o in outputs]): #if outputs[0] is None: if isinstance(subset, (list,tuple)): # FIXME This assumes that sgnOut is cached: outputs = [(self.getOutput(CuratedSubset.topattern(s)) if CuratedSubset.tobinary(s) is not self.subset else o ) for o, s in zip([self.sgnOut,self.bkgOut], subset)] else: if CuratedSubset.tobinary(subset) is not self.subset: outputs = self.getOutput(CuratedSubset.topattern(subset)) else: outputs = [self.sgnOut,self.bkgOut] # NOTE: This can be commented out to improve speed try: from libTuningToolsLib import genRoc except ImportError: from libTuningTools import genRoc o = genRoc(outputs[0], outputs[1], +self._ol, -self._ol, 0.001 ) auc = Roc( o ).auc self._effOutput = outputs if thres is None: thres = self.thres if makeCorr else self.rawThres pd = self._calcEff( subset[0], output = outputs[0], pileup = pileup[0], thres = thres, makeCorr = makeCorr ) pf = self._calcEff( subset[1], output = outputs[1], pileup = pileup[1], thres = thres, makeCorr = makeCorr ) sp = calcSP(pd, 1. - pf) return PerformancePoint( name, sp, pd, pf, thres, perc = False, auc = auc , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx , etBin = self.etBin, etaBin = self.etaBin )
def getOutput(self, subset): self._verbose('Propagating output for subset: %s', CuratedSubset.tostring(subset)) data = self._dataCurator[CuratedSubset.topattern( subset )] inputDim = self._dataCurator.nInputs if inputDim != self._discr.getNumNodes(0): self._fatal( "Data number of patterns (%d) do not match with discriminator input dimension (%d)!" , inputDim, self._discr.getNumNodes(0)) if CuratedSubset.isbinary( subset ): if CuratedSubset.isoperation( subset ): output = [np.concatenate([self._discr.propagate_np(sd) for sd in d], axis = npCurrent.odim) for d in data] else: output = [self._discr.propagate_np(d) for d in data] else: if CuratedSubset.isoperation( subset ): output = np.concatenate([self._discr.propagate_np(d) for d in data], axis = npCurrent.odim) else: output = self._discr.propagate_np(data) return output
def _getCorrectionData( self, referenceObj, **kw ): neuron = kw.get('neuron', None ) sort = kw.get('sort', None ) init = kw.get('init', None ) self._baseLabel = "ref%s_etBin%d_etaBin%d%s%s%s" % ( ReferenceBenchmark.tostring( referenceObj.reference ) , self._dataCurator.etBinIdx , self._dataCurator.etaBinIdx , ( '_neuron%d' % neuron ) if neuron is not None else '' , ( '_sort%d' % sort ) if sort is not None else '' , ( '_init%d' % init ) if init is not None else '' ) self.sgnHist, self.sgnOut = self.get2DPerfHist( CuratedSubset.tosgn(self.subset), 'signal_' + self._baseLabel, getOutputs = True ) self.bkgHist, self.bkgOut = self.get2DPerfHist( CuratedSubset.tobkg(self.subset), 'background_' + self._baseLabel, getOutputs = True ) if not self.rawPerf: try: from libTuningToolsLib import genRoc except ImportError: from libTuningTools import genRoc # Get raw threshold: if referenceObj.reference is ReferenceBenchmark.Pd: raw_thres = RawThreshold( - np.percentile( -self.sgnOut, referenceObj.refVal * 100. ) ) elif referenceObj.reference is ReferenceBenchmark.Pf: raw_thres = RawThreshold( - np.percentile( -self.bkgOut, referenceObj.refVal * 100. ) ) else: o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 ) roc = Roc( o , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx , etBin = self.etBin, etaBin = self.etaBin ) self.rawPerf = roc.retrieve( referenceObj ) if referenceObj.reference in ( ReferenceBenchmark.Pd, ReferenceBenchmark.Pf ): self.rawPerf = self.getEffPoint( referenceObj.name , thres = raw_thres , makeCorr = False ) # Here we protect against choosing suboptimal Pd/Pf points: o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 ) roc = Roc( o , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx , etBin = self.etBin, etaBin = self.etaBin ) # Check whether we could be performing better: if referenceObj.reference is ReferenceBenchmark.Pd: mask = roc.pds >= referenceObj.refVal elif referenceObj.reference is ReferenceBenchmark.Pf: mask = roc.pfs <= referenceObj.refVal pds = roc.pds[mask] pfs = roc.pfs[mask] sps = roc.sps[mask] if referenceObj.reference is ReferenceBenchmark.Pd: mask = pfs <= ( 1.001 * self.rawPerf.pf ) sps = sps[mask] pfs = pfs[mask] if len(sps): idx = np.argmax(sps) if pfs[idx] < 0.98 * self.rawPerf.pf: self._warning('Model is sub-optimal when performing at requested Pd.') self._info('Using highest SP operation point with virtually same Pf.') raw_thres = RawThreshold( - np.percentile( -self.bkgOut, pfs[idx] * 100. ) ) self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False )) self.rawPerf = self.getEffPoint( referenceObj.name , thres = raw_thres , makeCorr = False ) self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False )) elif referenceObj.reference is ReferenceBenchmark.Pf: mask = pds >= ( 0.999 * self.rawPerf.pd ) sps = sps[mask] pds = pds[mask] if len(sps): idx = np.argmax(sps) if pds[idx] > 1.005 * self.rawPerf.pd: self._warning('Model is sub-optimal when performing at requested Pf.') self._info('Using highest SP operation point with virtually same Pd.') raw_thres = RawThreshold( - np.percentile( -self.sgnOut, pds[idx] * 100. ) ) self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False )) self.rawPerf = self.getEffPoint( referenceObj.name , thres = raw_thres , makeCorr = False ) self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False )) else: self._debug('Skipped calculating raw performance since we already have it calculated') self.rawThres = self.rawPerf.thres # use standard lh method using signal data: sgnCorrData = LHThresholdCorrectionData( self.sgnHist, self.rawPerf.pd, self.rawThres.thres, self.limits, 1. ) return sgnCorrData
def saveGraphs( self ): from ROOT import TH1F sStr = CuratedSubset.tostring( self.subset ) self.sgnCorrData.save( 'signalCorr_' + self._baseLabel + '_' + sStr) for i, bkgCorrData in enumerate(self.bkgCorrDataList): bkgCorrData.save( 'backgroundCorr_' + str(i) + '_' + self._baseLabel + '_' + sStr) sgnPileup = self.getPileup( CuratedSubset.tosgn( self.subset ) ) sgnPassPileup = sgnPileup[ self.rawThres.getMask( self.sgnOut ) ] sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel + '_' + sStr ) sgnEff.Write() bkgPileup = self.getPileup( CuratedSubset.tobkg( self.subset ) ) bkgPassPileup = bkgPileup[ self.rawThres.getMask( self.bkgOut ) ] bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel + '_' + sStr ) bkgEff.Write() # Write 2D histograms: self.sgnHist.Write( 'signal2DCorr_' + self._baseLabel + '_' + sStr) self.bkgHist.Write( 'background2DCorr_' + self._baseLabel + '_' + sStr) # Write output histograms: title = CuratedSubset.tostring( CuratedSubset.tosgn( self.subset ) ) + ' NN Output' sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) for p in self.sgnOut: sgnTH1.Fill( p ) sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr ) title = CuratedSubset.tostring( CuratedSubset.tobkg( self.subset ) ) + ' NN Output' bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) for p in self.bkgOut: bkgTH1.Fill( p ) bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr ) if self._effSubset is not None: esubset = CuratedSubset.tobinary( CuratedSubset.topattern( self._effSubset[0] ) ) if esubset is not self.subset: sStr = CuratedSubset.tostring( esubset ) # Here we have also to plot the subset where we have calculated the efficiency sgnPileup = self.getPileup( CuratedSubset.tosgn( self._effSubset[0] ) ) sgnPassPileup = sgnPileup[ self.rawThres.getMask( self._effOutput[0] ) ] sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel + '_' + sStr ) sgnEff.Write() sgnPassPileup = sgnPileup[ self.thres.getMask( self._effOutput[0], sgnPileup ) ] sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalCorr_' + self._baseLabel + '_' + sStr ) sgnEff.Write() bkgPileup = self.getPileup( CuratedSubset.tobkg( self._effSubset[1] ) ) bkgPassPileup = bkgPileup[ self.rawThres.getMask( self._effOutput[1] ) ] bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel + '_' + sStr ) bkgEff.Write() bkgPassPileup = bkgPileup[ self.thres.getMask( self._effOutput[1], bkgPileup ) ] bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundCorr_' + self._baseLabel + '_' + sStr ) bkgEff.Write() sgnHist = self.get2DPerfHist( self._effSubset[0], 'signal2DCorr_' + self._baseLabel + '_' + sStr, outputs = self._effOutput[0] ) sgnHist.Write('signal2DCorr_' + self._baseLabel + '_' + sStr) bkgHist = self.get2DPerfHist( self._effSubset[1], 'background2DCorr_' + self._baseLabel + '_' + sStr, outputs = self._effOutput[1] ) bkgHist.Write('background2DCorr_' + self._baseLabel + '_' + sStr) # 1D output plots title = CuratedSubset.tostring( self._effSubset[0] ) + ' NN Output' sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) for p in self.sgnOut: sgnTH1.Fill( p ) sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr ) title = CuratedSubset.tostring( self._effSubset[1] ) + ' NN Output' bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) for p in self.bkgOut: bkgTH1.Fill( p ) bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr )