Exemplo n.º 1
0
def divideGraph(num, denom, errorY=True, invRatio=False):
    '''                                                                                                                                                                                                                           
    Divide two TGraphs
     \param num    Numerator TGraph
     \param denom  Denominator TGraph
     \return new TGraph as the ratio of the two TGraphs
     '''
    gr = copy.deepcopy(num.getRootGraph())

    # Numerator and Denominator graphs are the same (i.e. reference histo)
    if num == denom:
        for i in xrange(gr.GetN()):
            gr.SetPoint(i, gr.GetX()[i], 1.0)

            a = gr.GetY()[i]
            sigmaAl = gr.GetEYlow()[i]
            sigmaAh = gr.GetEYhigh()[i]
            errLow = err.errorPropagationForDivision(a, sigmaAh, a, sigmaAh)
            errHigh = err.errorPropagationForDivision(a, sigmaAl, a, sigmaAl)
            gr.SetPointEYhigh(i, errLow)
            gr.SetPointEYlow(i, errHigh)
            # Disable error bars?
            if not errorY:
                gr.SetPointEYhigh(i, 1e-4)
                gr.SetPointEYlow(i, 1e-4)
            if 0:
                Print("x = %0.3f, y = %.3f" % (gr.GetX()[i], gr.GetY()[i]),
                      i == 1)
        return gr

    # For-loop: All points
    for i in xrange(gr.GetN()):
        yDiv = 0
        yVal = denom.getRootGraph().GetY()[i]

        # Sanity check
        if yVal > 0:
            yDiv = gr.GetY()[i] / yVal

            if invRatio:
                yDiv = 1.0 / yDiv

        # Set new point x-y coords
        gr.SetPoint(i, gr.GetX()[i], yDiv)
        a = num.getRootGraph().GetY()[i]
        b = denom.getRootGraph().GetY()[i]
        sigmaAh = num.getRootGraph().GetEYhigh()[i]
        sigmaBh = denom.getRootGraph().GetEYhigh()[i]
        sigmaAl = num.getRootGraph().GetEYlow()[i]
        sigmaBl = denom.getRootGraph().GetEYlow()[i]
        errHigh = err.errorPropagationForDivision(a, sigmaAh, b, sigmaBh)
        errLow = err.errorPropagationForDivision(a, sigmaAl, b, sigmaBl)
        gr.SetPointEYhigh(i, errHigh)
        gr.SetPointEYlow(i, errLow)

        # Disable error bars?
        if not errorY:
            gr.SetPointEYlow(i, yVal * 1e-4)
            gr.SetPointEYhigh(i, yVal * 1e-4)
    return gr
    def _calculate(self, numerator, denominator):
        self._efficiencies = []
        myUncertaintyLabels = ["statData", "statEWK"]
        nSplitBins = numerator.getNumberOfPhaseSpaceSplitBins()
        for i in range(0, nSplitBins):
            hNum = numerator.getDataDrivenQCDHistoForSplittedBin(i)
            hNum.SetName("hNum")
            hNumData = numerator.getDataHistoForSplittedBin(i)
            hNumData.SetName("hNumData")
            hNumEwk = numerator.getEwkHistoForSplittedBin(i)
            hNumEwk.SetName("hNumEwk")
            hDenom = denominator.getDataDrivenQCDHistoForSplittedBin(i)
            hDenom.SetName("hDenom")
            hDenomData = denominator.getDataHistoForSplittedBin(i)
            hDenomData.SetName("hDenomData")
            hDenomEwk = denominator.getEwkHistoForSplittedBin(i)
            hDenomEwk.SetName("hDenomEwk")

            # Sum over basic shape and leg2 shape to obtain normalisation factor
            mySumNum = hNum.Integral(1, hNum.GetNbinsX() + 2)
            mySumNumDataUncert = integratedUncertaintyForHistogram(
                1,
                hNumData.GetNbinsX() + 2, hNumData)
            mySumNumEwkUncert = integratedUncertaintyForHistogram(
                1,
                hNumEwk.GetNbinsX() + 2, hNumEwk)
            mySumDenom = hDenom.Integral(1, hDenom.GetNbinsX() + 2)
            mySumDenomDataUncert = integratedUncertaintyForHistogram(
                1,
                hDenomData.GetNbinsX() + 2, hDenomData)
            mySumDenomEwkUncert = integratedUncertaintyForHistogram(
                1,
                hDenomEwk.GetNbinsX() + 2, hDenomEwk)

            # Calculate efficiency
            myEfficiency = 0.0
            myEfficiencyUncertData = errorPropagation.errorPropagationForDivision(
                mySumNum, mySumNumDataUncert, mySumDenom, mySumDenomDataUncert)
            myEfficiencyUncertEwk = errorPropagation.errorPropagationForDivision(
                mySumNum, mySumNumEwkUncert, mySumDenom, mySumDenomEwkUncert)
            if abs(mySumNum) > 0.000001 and abs(mySumDenom) > 0.000001:
                myEfficiency = mySumNum / mySumDenom
            self._efficiencies.append(
                ExtendedCount(myEfficiency,
                              [myEfficiencyUncertData, myEfficiencyUncertEwk],
                              myUncertaintyLabels))
            ROOT.gDirectory.Delete("hNum")
            ROOT.gDirectory.Delete("hNumData")
            ROOT.gDirectory.Delete("hNumEwk")
            ROOT.gDirectory.Delete("hDenom")
            ROOT.gDirectory.Delete("hDenomData")
            ROOT.gDirectory.Delete("hDenomEwk")
        # FIXME: add histogram for efficiency
        return
def createSystHistograms(hRate,
                         hSystUp,
                         hSystDown,
                         hNumerator,
                         hDenominator,
                         quietMode=True):
    for i in range(1, hRate.GetNbinsX() + 1):
        myRatio = 1.0
        myRatioSigma = 0.2  # Relative uncertainty default value
        if abs(hNumerator.GetBinContent(i)) > 0.00001 and abs(
                hDenominator.GetBinContent(i)) > 0.00001:
            # Allow ratio to fluctuate also to negative side (it may happen for small numbers of the final shape)
            myRatio = hNumerator.GetBinContent(i) / hDenominator.GetBinContent(
                i)
            myRatioSigma = errorPropagationForDivision(
                hNumerator.GetBinContent(i), hNumerator.GetBinError(i),
                hDenominator.GetBinContent(i), hDenominator.GetBinError(i))
            if myRatioSigma > 1.0:
                myRatioSigma = 1.0
            #if myRatio < 0.0:
            #    myRatioSigma *= -1.0 # this would take a potential cross-over into account, but it is discouraged
            # because merging bins could lead to potential cancellations and underestimation of syst. uncertainty
        #print i, (myRatio+myRatioSigma)*hRate.GetBinContent(i), (myRatio-myRatioSigma)*hRate.GetBinContent(i), hRate.GetBinContent(i)
        hSystUp.SetBinContent(i, (1.0 + myRatioSigma) * hRate.GetBinContent(i))
        hSystDown.SetBinContent(i,
                                (1.0 - myRatioSigma) * hRate.GetBinContent(i))
    # Calculate total uncertainty
    if not quietMode:
        mySignalIntegral = hNumerator.Integral()
        myCtrlIntegral = hDenominator.Integral()
        mySignalUncert = 0.0
        myCtrlUncert = 0.0
        for i in range(1, hSystUp.GetNbinsX() + 1):
            mySignalUncert += hNumerator.GetBinError(i)**2
            myCtrlUncert += hDenominator.GetBinError(i)**2
        myRatio = 1.0
        myRatioSigma = 0.0
        if mySignalIntegral > 0.0 and myCtrlIntegral > 0.0:
            myRatio = mySignalIntegral / myCtrlIntegral
            myRatioSigma = errorPropagationForDivision(mySignalIntegral,
                                                       sqrt(mySignalUncert),
                                                       myCtrlIntegral,
                                                       sqrt(myCtrlUncert))
        mySigmaUp = myRatio + myRatioSigma - 1.0
        mySigmaDown = myRatio - myRatioSigma - 1.0
        print "Estimate for syst. uncertainty of non-isol.->isol. shape difference: up: %.1f %% down: %.1f %%" % (
            mySigmaUp * 100.0, mySigmaDown * 100.0)
    def _calculate(self, numerator, denominator):
        self._efficiencies = []
        myUncertaintyLabels = ["statData", "statEWK"]
        nSplitBins = numerator.getNumberOfPhaseSpaceSplitBins()
        for i in range(0, nSplitBins):
            hNum = numerator.getDataDrivenQCDHistoForSplittedBin(i)
            hNum.SetName("hNum")
            hNumData = numerator.getDataHistoForSplittedBin(i)
            hNumData.SetName("hNumData")
            hNumEwk = numerator.getEwkHistoForSplittedBin(i)
            hNumEwk.SetName("hNumEwk")
            hDenom = denominator.getDataDrivenQCDHistoForSplittedBin(i)
            hDenom.SetName("hDenom")
            hDenomData = denominator.getDataHistoForSplittedBin(i)
            hDenomData.SetName("hDenomData")
            hDenomEwk = denominator.getEwkHistoForSplittedBin(i)
            hDenomEwk.SetName("hDenomEwk")

            # Sum over basic shape and leg2 shape to obtain normalisation factor
            mySumNum = hNum.Integral(1, hNum.GetNbinsX()+2)
            mySumNumDataUncert = integratedUncertaintyForHistogram(1, hNumData.GetNbinsX()+2, hNumData)
            mySumNumEwkUncert = integratedUncertaintyForHistogram(1, hNumEwk.GetNbinsX()+2, hNumEwk)
            mySumDenom = hDenom.Integral(1, hDenom.GetNbinsX()+2)
            mySumDenomDataUncert = integratedUncertaintyForHistogram(1, hDenomData.GetNbinsX()+2, hDenomData)
            mySumDenomEwkUncert = integratedUncertaintyForHistogram(1, hDenomEwk.GetNbinsX()+2, hDenomEwk)

            # Calculate efficiency
            myEfficiency = 0.0
            myEfficiencyUncertData = errorPropagation.errorPropagationForDivision(mySumNum, mySumNumDataUncert, mySumDenom, mySumDenomDataUncert)
            myEfficiencyUncertEwk = errorPropagation.errorPropagationForDivision(mySumNum, mySumNumEwkUncert, mySumDenom, mySumDenomEwkUncert)
            if abs(mySumNum) > 0.000001 and abs(mySumDenom) > 0.000001:
                myEfficiency = mySumNum / mySumDenom
            self._efficiencies.append(ExtendedCount(myEfficiency, [myEfficiencyUncertData, myEfficiencyUncertEwk], myUncertaintyLabels))
            ROOT.gDirectory.Delete("hNum")
            ROOT.gDirectory.Delete("hNumData")
            ROOT.gDirectory.Delete("hNumEwk")
            ROOT.gDirectory.Delete("hDenom")
            ROOT.gDirectory.Delete("hDenomData")
            ROOT.gDirectory.Delete("hDenomEwk")
        # FIXME: add histogram for efficiency
        return
Exemplo n.º 5
0
    def CalculatePurity(self, nBins, shape, shapeDataSum, shapeDataSumUncert, shapeEwkSum, shapeEwkSumUncert, verbose=False):
        # Construct info table (debugging)
        table = []
        align  = "{:>6} {:^20} {:>10} {:^3} {:<10}"
        header = align.format("Bin", "Range", "Purity", "+/-", "Uncertainty")
        hLine  = "="*70
        table.append("{:^70}".format(shape.getHistoName()))
        table.append(hLine)
        table.append(header)
        table.append(hLine)
        
        # For-loop: All shape bins
        for j in range (1, nBins+1):
            
            # Declare variables
            myPurity       = 0.0
            myPurityUncert = 0.0
            ewkSum         = shapeEwkSum[j-1]
            ewkSumUncert   = math.sqrt(shapeEwkSumUncert[j-1])
            dataSum        = shapeDataSum[j-1]
            dataSumUncert  = math.sqrt(shapeDataSumUncert[j-1])

            # Ignore zero bins
            if abs(dataSum) > 0.000001:
                myPurity       = 1.0 - ewkSum / dataSum
                myPurityUncert = errorPropagation.errorPropagationForDivision(ewkSum, ewkSumUncert, dataSum, dataSumUncert)
                
            # Store MC EWK content
            self._resultShapeEWK.SetBinContent(j, ewkSum)
            self._resultShapeEWK.SetBinError(j, ewkSumUncert)

            # Store Purity content
            self._resultShapePurity.SetBinContent(j, myPurity)
            self._resultShapePurity.SetBinError(j, myPurityUncert)
            
            # Bin-range or overflow bin?
            if j < self._resultShape.GetNbinsX():
                binRange = "%.1f -> %.1f" % (self._resultShape.GetXaxis().GetBinLowEdge(j), self._resultShape.GetXaxis().GetBinUpEdge(j) )                
            else:
                binRange = "> %.1f"   % (self._resultShape.GetXaxis().GetBinLowEdge(j) )
            table.append(align.format(j, binRange, "%.3f" % myPurity, "+/-", "%.3f" % myPurityUncert))
        table.append(hLine)

        #FIXME: shape.getDataDrivenQCDHistoForSplittedBin(0).GetBinWidth(1) =  Njets_Data_0dataDriven
        # something is wrong here? is that why the binwidth, binLowEdge, etc.. of purity are all 0?
        # Print purity as function of final shape bins
        if verbose:
            self.Print("Printing Shape Purity bin-by-bin.", True)
            for i, line in enumerate(table):
                self.Print(line, i==0)
        return
Exemplo n.º 6
0
    def CalculatePurity(self, nBins, shape, shapeDataSum, shapeDataSumUncert, shapeEwkSum, shapeEwkSumUncert, verbose=False):
        # Construct info table (debugging)
        table = []
        align  = "{:>6} {:^20} {:>10} {:^3} {:<10}"
        header = align.format("Bin", "Range", "Purity", "+/-", "Uncertainty")
        hLine  = "="*70
        table.append("{:^70}".format(shape.getHistoName()))
        table.append(hLine)
        table.append(header)
        table.append(hLine)
        
        # For-loop: All shape bins
        for j in range (1, nBins+1):
            
            # Declare variables
            myPurity       = 0.0
            myPurityUncert = 0.0
            ewkSum         = shapeEwkSum[j-1]
            ewkSumUncert   = math.sqrt(shapeEwkSumUncert[j-1])
            dataSum        = shapeDataSum[j-1]
            dataSumUncert  = math.sqrt(shapeDataSumUncert[j-1])

            # Ignore zero bins
            if abs(dataSum) > 0.000001:
                myPurity       = 1.0 - ewkSum / dataSum
                myPurityUncert = errorPropagation.errorPropagationForDivision(ewkSum, ewkSumUncert, dataSum, dataSumUncert)
                
            # Store MC EWK content
            self._resultShapeEWK.SetBinContent(j, ewkSum)
            self._resultShapeEWK.SetBinError(j, ewkSumUncert)

            # Store Purity content
            self._resultShapePurity.SetBinContent(j, myPurity)
            self._resultShapePurity.SetBinError(j, myPurityUncert)
            
            # Bin-range or overflow bin?
            if j < self._resultShape.GetNbinsX():
                binRange = "%.1f -> %.1f" % (self._resultShape.GetXaxis().GetBinLowEdge(j), self._resultShape.GetXaxis().GetBinUpEdge(j) )                
            else:
                binRange = "> %.1f"   % (self._resultShape.GetXaxis().GetBinLowEdge(j) )
            table.append(align.format(j, binRange, "%.3f" % myPurity, "+/-", "%.3f" % myPurityUncert))
        table.append(hLine)

        #FIXME: shape.getDataDrivenQCDHistoForSplittedBin(0).GetBinWidth(1) =  Njets_Data_0dataDriven
        # something is wrong here? is that why the binwidth, binLowEdge, etc.. of purity are all 0?
        # Print purity as function of final shape bins
        if verbose:
            self.Print("Printing Shape Purity bin-by-bin.", True)
            for i, line in enumerate(table):
                self.Print(line, i==0)
        return
    def GetTotalUncertainyTable(self, hRate, hSystUp, hSystDown, hLine, align):
        table = []

        # Calculate total uncertainty
        rateNominalSum = hRate.Integral()
        rateSystUpSum = hSystUp.Integral()
        rateSystDownSum = hSystDown.Integral()
        signalUncert = 0.0
        ctrlUncert = 0.0
        ratio = 1.0
        ratioSigma = 0.0
        nBinsX = hRate.GetNbinsX()

        # For-loop: All bins in histo (up)
        for i in range(1, nBinsX + 1):
            signalUncert += hSystUp.GetBinError(i)**2
            ctrlUncert += hSystDown.GetBinError(i)**2

        # Sanity check
        if rateSystUpSum > 0.0 and rateSystDownSum > 0.0:
            # Calculate ratio and its error with error propagation
            ratio = rateSystUpSum / rateSystDownSum
            # Calculate ratio error with error propagation
            ratioSigma = errorPropagationForDivision(rateSystUpSum,
                                                     sqrt(signalUncert),
                                                     rateSystDownSum,
                                                     sqrt(ctrlUncert))

        # Calculate % errors up/down
        table.append(hLine)
        sigmaUp = (ratio + ratioSigma - 1.0) * 100
        sigmaDown = (ratio - ratioSigma - 1.0) * 100
        rangeX = "%s to %s" % (hRate.GetBinCenter(1),
                               hRate.GetBinCenter(nBinsX))
        rangeBins = "1 to %d" % (nBinsX)
        table.append(
            align.format(rangeBins, rangeX, "%.1f" % rateNominalSum,
                         "%.1f" % rateSystUpSum, "%.1f" % rateSystDownSum,
                         "%.1f" % (sigmaUp), "%.1f" % (sigmaDown)))
        evtYield = "{:^85}".format(
            "Events +/- stat. +/- syst. = %.1f +/- %.1f +/- %.1f" %
            (rateNominalSum, abs(rateNominalSum - rateSystUpSum),
             abs(rateNominalSum - rateSystDownSum)))
        table.append(ShellStyles.HighlightAltStyle() + evtYield +
                     ShellStyles.NormalStyle())
        table.append(hLine)
        return table
    def GetTotalUncertainyTable(self, hRate, hSystUp, hSystDown, hLine, align):
        table = []

        # Calculate total uncertainty
        rateNominalSum  = hRate.Integral()
        rateSystUpSum   = hSystUp.Integral()
        rateSystDownSum = hSystDown.Integral()
        signalUncert   = 0.0
        ctrlUncert     = 0.0
        ratio          = 1.0
        ratioSigma     = 0.0
        nBinsX          = hRate.GetNbinsX()

        # For-loop: All bins in histo (up)
        for i in range(1, nBinsX+1):
            signalUncert += hSystUp.GetBinError(i)**2
            ctrlUncert   += hSystDown.GetBinError(i)**2

        # Sanity check
        if rateSystUpSum > 0.0 and rateSystDownSum > 0.0:
            # Calculate ratio and its error with error propagation
            ratio = rateSystUpSum / rateSystDownSum
            # Calculate ratio error with error propagation
            ratioSigma = errorPropagationForDivision(rateSystUpSum, sqrt(signalUncert), rateSystDownSum, sqrt(ctrlUncert) )

        # Calculate % errors up/down
        table.append(hLine)
        sigmaUp   = (ratio + ratioSigma - 1.0)*100
        sigmaDown = (ratio - ratioSigma - 1.0)*100
        rangeX    = "%s to %s" % (hRate.GetBinCenter(1), hRate.GetBinCenter(nBinsX))
        rangeBins = "1 to %d" % (nBinsX)    
        table.append( align.format(rangeBins, rangeX, "%.1f" % rateNominalSum, "%.1f" % rateSystUpSum, "%.1f" % rateSystDownSum, "%.1f" % (sigmaUp), "%.1f" % (sigmaDown)) )
        evtYield  = "{:^85}".format("Events +/- stat. +/- syst. = %.1f +/- %.1f +/- %.1f" % (rateNominalSum, abs(rateNominalSum-rateSystUpSum), abs(rateNominalSum-rateSystDownSum)))
        table.append( ShellStyles.HighlightAltStyle() + evtYield + ShellStyles.NormalStyle() )
        table.append(hLine)
        return table
Exemplo n.º 9
0
    def CalculateTransferFactor(self,
                                binLabel,
                                hFakeB_Baseline,
                                hFakeB_Inverted,
                                verbose=False):
        '''
        Calculates the combined normalization and, if specified, 
        varies it up or down by factor (1+variation)
 
        TF = Transfer Factor
        SR = Signal Region
        CR = Control Region
        VR = Verification Region
        '''
        self.verbose = verbose

        # Obtain counts for QCD and EWK fakes
        lines = []

        # NOTES: Add EWKGenuineB TF, Add Data TF, add QCD TF, Add EWK TF, add MCONLY TFs
        nSR_Error = ROOT.Double(0.0)
        nCR_Error = ROOT.Double(0.0)
        # nTotalError = ROOT.TMath.Sqrt(nSRerror**2 + nCRError**2)

        nSR = hFakeB_Baseline.IntegralAndError(1,
                                               hFakeB_Baseline.GetNbinsX() + 1,
                                               nSR_Error)
        nCR = hFakeB_Inverted.IntegralAndError(1,
                                               hFakeB_Inverted.GetNbinsX() + 1,
                                               nCR_Error)
        # nTotal = nSR + nCR

        # Calculate Transfer Factor (TF) from Control Region (R) to Signal Region (SR): R = N_CR1/ N_CR2
        TF = None
        TF_Up = None
        TF_Down = None
        TF_Error = None

        if 1:  ## nTotal > 0.0:
            TF = nSR / nCR
            TF_Error = errorPropagation.errorPropagationForDivision(
                nSR, nSR_Error, nCR, nCR_Error)
            TF_Up = TF + TF_Error
            if TF_Up > 1.0:
                TF_Up = 1.0
            TF_Down = TF - TF_Error
            if TF_Down < 0.0:
                TF_Down = 0.0
        lines.append("TF (bin=%s) = N_CR1 / N_CR2 = %f / %f =  %f +- %f" %
                     (binLabel, nSR, nCR, TF, TF_Error))

        # Calculate the combined normalization factor (f_fakes = w*f_QCD + (1-w)*f_EWKfakes)
        fakeRate = None
        fakeRateError = None
        fakeRateUp = None
        fakeRateDown = None
        if TF != None:
            #     fakeRate = w*self._TF[binLabel] + (1.0-w)*self._ewkNormalization[binLabel]
            #     fakeRateUp = wUp*self._TF[binLabel] + (1.0-wUp)*self._ewkNormalization[binLabel]
            #     fakeRateDown = wDown*self._TF[binLabel] + (1.0-wDown)*self._ewkNormalization[binLabel]
            #     fakeRateErrorPart1 = errorPropagation.errorPropagationForProduct(w, wError, self._TF[binLabel], self._TFError[binLabel])
            #     fakeRateErrorPart2 = errorPropagation.errorPropagationForProduct(w, wError, self._ewkNormalization[binLabel], self._ewkNormalizationError[binLabel])
            #     fakeRateError = ROOT.TMath.Sqrt(fakeRateErrorPart1**2 + fakeRateErrorPart2**2)

            # Replace bin label with histo title (has exact binning info)
            self._BinLabelMap[binLabel] = hFakeB_Inverted.GetTitle()
            self._TF[binLabel] = TF
            self._TF_Error[binLabel] = TF_Error
            self._TF_Up[binLabel] = TF_Up
            self._TF_Down[binLabel] = TF_Down
        # self._combinedFakesNormalizationError[binLabel] = fakeRateError
        # self._combinedFakesNormalizationUp[binLabel] = fakeRateUp
        # self._combinedFakesNormalizationDown[binLabel] = fakeRateDown

        # Store all information for later used (write to file)
        self._commentLines.extend(lines)

        # Print output and store comments
        if 0:
            for i, line in enumerate(lines, 1):
                Print(line, i == 1)
        return
    def CalculateTransferFactor(self, binLabel, hFakeB_CR1, hFakeB_CR2, hFakeB_CR3, hFakeB_CR4, verbose=False):
        '''
        Calculates the combined normalization and, if specified, 
        varies it up or down by factor (1+variation)
 
        TF = Transfer Factor
        SR = Signal Region
        CR = Control Region
        VR = Verification Region
        '''
        self.verbose = verbose

        # Obtain counts for QCD and EWK fakes
        lines = []

        # NOTES: Add EWKGenuineB TF, Add Data TF, add QCD TF, Add EWK TF, add MCONLY TFs
        nCR1_Error = ROOT.Double(0.0)
        nCR2_Error = ROOT.Double(0.0)
        nCR3_Error = ROOT.Double(0.0)
        nCR4_Error = ROOT.Double(0.0)
        
        # Get Events in all CRs and their associated errors
        nCR1 = hFakeB_CR1.IntegralAndError(1, hFakeB_CR1.GetNbinsX()+1, nCR1_Error)
        nCR2 = hFakeB_CR2.IntegralAndError(1, hFakeB_CR2.GetNbinsX()+1, nCR2_Error)
        nCR3 = hFakeB_CR3.IntegralAndError(1, hFakeB_CR3.GetNbinsX()+1, nCR3_Error)
        nCR4 = hFakeB_CR4.IntegralAndError(1, hFakeB_CR4.GetNbinsX()+1, nCR4_Error)

        # Calculate Transfer Factor (TF) from Control Region (R) to Signal Region (SR): R = N_CR1/ N_CR2
        TF        = None
        TF_Up     = None
        TF_Up2x   = None
        TF_Up3x   = None
        TF_Down   = None
        TF_Down2x = None
        TF_Down3x = None
        TF_Error  = None
        TF        = (nCR1 / nCR2)
        TF_Error  = errorPropagation.errorPropagationForDivision(nCR1, nCR1_Error, nCR2, nCR2_Error)

        # Up variations
        TF_Up    = TF + TF_Error
        TF_Up2x  = TF + 2*TF_Error
        TF_Up3x  = TF + 3*TF_Error
        if TF_Up > 1.0:
            Print("Forcing TF_Up (=%.3f) to be equal to 1!" % ( TF_Up), True) # added  23 Oct 2018
            TF_Up = 1.0
        if TF_Up2x > 1.0:
            Print("Forcing TF_Up2x (=%.3f) to be equal to 1!" % ( TF_Up2x), True) # added  23 Oct 2018
            TF_Up2x = 1.0
        if TF_Up3x > 1.0:
            Print("Forcing TF_Up3x (=%.3f) to be equal to 1!" % ( TF_Up3x), True) # added  23 Oct 2018
            TF_Up3x = 1.0

        # Down variations
        TF_Down   = TF - TF_Error
        TF_Down2x = TF - 2*TF_Error
        TF_Down3x = TF - 3*TF_Error
        if TF_Down < 0.0:
            Print("Forcing TF_Down   (=%.3f) to be equal to 0" % (TF_Down), True) # added  23 Oct 2018
            TF_Down = 0.0
        if TF_Down2x < 0.0:
            Print("Forcing TF_Down2x (=%.3f) to be equal to 0" % (TF_Down2x), True) # added  23 Oct 2018
            TF_Down2x = 0.0
        if TF_Down3x < 0.0:
            Print("Forcing TF_Down3x (=%.3f) to be equal to 0" % (TF_Down3x), True) # added  23 Oct 2018
            TF_Down3x = 0.0

        lines.append("TF (bin=%s) = N_CR1 / N_CR2 = %f / %f =  %f +- %f" % (binLabel, nCR1, nCR2, TF, TF_Error) )

        # Calculate the transfer factors (R_{i}) where i is index of bin the Fake-b measurement is made in (pT and/or eta of ldg b-jet)
        if TF != None:
            # Replace bin label with histo title (has exact binning info)
            self._BinLabelMap[binLabel] = self.getNiceBinLabel(hFakeB_CR2.GetTitle())
            self._NEvtsCR1[binLabel]       = nCR1
            self._NEvtsCR1_Error[binLabel] = nCR1_Error
            self._NEvtsCR2[binLabel]       = nCR2
            self._NEvtsCR2_Error[binLabel] = nCR2_Error
            self._NEvtsCR3[binLabel]       = nCR3
            self._NEvtsCR3_Error[binLabel] = nCR3_Error
            self._NEvtsCR4[binLabel]       = nCR4
            self._NEvtsCR4_Error[binLabel] = nCR4_Error
            self._TF[binLabel   ]       = TF
            self._TF_Error[binLabel]    = TF_Error
            self._TF_Up[binLabel]       = TF_Up
            self._TF_Up2x[binLabel]     = TF_Up2x
            self._TF_Up3x[binLabel]     = TF_Up3x
            self._TF_Down[binLabel]     = TF_Down 
            self._TF_Down2x[binLabel]   = TF_Down2x 
            self._TF_Down3x[binLabel]   = TF_Down3x 
            self._FakeBNormalization[binLabel]       = TF         # TF
            self._FakeBNormalizationError[binLabel]  = TF_Error   # Error(TF)
            self._FakeBNormalizationUp[binLabel]     = TF_Up      # TF + Error
            self._FakeBNormalizationUp2x[binLabel]   = TF_Up2x    # TF + 2*Error
            self._FakeBNormalizationUp3x[binLabel]   = TF_Up3x    # TF + 3*Error
            self._FakeBNormalizationDown[binLabel]   = TF_Down    # TF - Error
            self._FakeBNormalizationDown2x[binLabel] = TF_Down2x  # TF - 2*Error
            self._FakeBNormalizationDown3x[binLabel] = TF_Down3x  # TF - 3*Error

        # Store all information for later used (write to file)
        self._commentLines.extend(lines)

        # Print output and store comments
        if 0:
            for i, line in enumerate(lines, 1):
                Print(line, i==1)
        return
Exemplo n.º 11
0
    def _doCalculate2D(self, nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels):
        '''
        Calculates the result for 2D histograms
        '''
        # Intialize counters for purity calculation in final shape binning
        myShapeDataSum = []
        myShapeDataSumUncert = []
        myShapeEwkSum = []
        myShapeEwkSumUncert = []
        myList = []
        for k in range(1,self._resultShape.GetNbinsY()+1):
            myList.append(0.0)
        for j in range(1,self._resultShape.GetNbinsX()+1):
            myShapeDataSum.append(myList[:])
            myShapeDataSumUncert.append(myList[:])
            myShapeEwkSum.append(myList[:])
            myShapeEwkSumUncert.append(myList[:])

        # Calculate results separately for each phase-space bin, and then combine them to get inclusive result
        for i in range(0, nSplitBins):
            # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
            h     = shape.getDataDrivenQCDHistoForSplittedBin(i)
            hData = shape.getDataHistoForSplittedBin(i)
            hEwk  = shape.getEwkHistoForSplittedBin(i)

            # Get normalization factor
            wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
            if self._optionUseInclusiveNorm:
                wQCDLabel = "Inclusive"
            wQCD = 0.0
            if not wQCDLabel in normFactors.keys():
                msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel, shape.getHistoName())
                print ShellStyles.WarningLabel() + msg
            else:
                wQCD = normFactors[wQCDLabel]
            # Loop over bins in the shape histogram
            for j in range(1,h.GetNbinsX()+1):
                for k in range(1,h.GetNbinsY()+1):
                    myResult = 0.0
                    myStatDataUncert = 0.0
                    myStatEwkUncert = 0.0
                    if abs(h.GetBinContent(j,k)) > 0.00001: # Ignore zero bins
                        # Calculate result
                        myResult = h.GetBinContent(j,k) * wQCD
                        # Calculate abs. stat. uncert. for data and for MC EWK
                        myStatDataUncert = hData.GetBinError(j,k) * wQCD
                        myStatEwkUncert = hEwk.GetBinError(j,k) * wQCD
                        #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                        # Do not calculate here MC EWK syst.
                    myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
                    self._resultCountObject.add(myCountObject)
                    if optionDoNQCDByBinHistograms:
                        self._histogramsList[i].SetBinContent(j, k, myCountObject.value())
                        self._histogramsList[i].SetBinError(j, k, myCountObject.statUncertainty())
                    self._resultShape.SetBinContent(j, k, self._resultShape.GetBinContent(j, k) + myCountObject.value())
                    self._resultShape.SetBinError(j, k, self._resultShape.GetBinError(j, k) + myCountObject.statUncertainty()**2) # Sum squared
                    # Sum items for purity calculation
                    myShapeDataSum[j-1][k-1] += hData.GetBinContent(j,k)*wQCD
                    myShapeDataSumUncert[j-1][k-1] += (hData.GetBinError(j,k)*wQCD)**2
                    myShapeEwkSum[j-1][k-1] += hEwk.GetBinContent(j,k)*wQCD
                    myShapeEwkSumUncert[j-1][k-1] += (hEwk.GetBinError(j,k)*wQCD)**2
            h.Delete()
            hData.Delete()
            hEwk.Delete()
        # Take square root of uncertainties
        for j in range(1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                self._resultShape.SetBinError(j, k, math.sqrt(self._resultShape.GetBinError(j, k)))

        # Print result
        print "NQCD Integral(%s) = %s "%(shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f"))

        # Print purity as function of final shape bins
        if optionPrintPurityByBins:
            print "Purity of shape %s"%shape.getHistoName()
            print "shapeBin purity purityUncert"
        for j in range (1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                myPurity = 0.0
                myPurityUncert = 0.0
                if abs(myShapeDataSum[j-1][k-1]) > 0.000001:
                    myPurity = 1.0 - myShapeEwkSum[j-1][k-1] / myShapeDataSum[j-1][k-1]
                    myPurityUncert = errorPropagation.errorPropagationForDivision(myShapeEwkSum[j-1][k-1], math.sqrt(myShapeEwkSumUncert[j-1][k-1]), myShapeDataSum[j-1][k-1], math.sqrt(myShapeDataSumUncert[j-1][k-1]))
                # Store MC EWK content
                self._resultShapeEWK.SetBinContent(j, k, myShapeEwkSum[j-1][k-1])
                self._resultShapeEWK.SetBinError(j, k, math.sqrt(myShapeEwkSumUncert[j-1][k-1]))
                self._resultShapePurity.SetBinContent(j, k, myPurity)
                self._resultShapePurity.SetBinError(j, k, myPurityUncert)
                # Print purity info of final shape
                if optionPrintPurityByBins:
                    myString = ""
                    if j < self._resultShape.GetNbinsX():
                        myString = "%d..%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j),self._resultShape.GetXaxis().GetBinUpEdge(j))
                    else:
                        myString = ">%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j))
                    if k < self._resultShape.GetNbinsY():
                        myString = "%d..%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k),self._resultShape.GetYaxis().GetBinUpEdge(k))
                    else:
                        myString = ">%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k))
                    myString += " %.3f %.3f"%(myPurity, myPurityUncert)
                    print myString
        return
Exemplo n.º 12
0
 def _doCalculate(self, shape, moduleInfoString, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms):
     # Calculate final shape in signal region (shape * w_QCD)
     nSplitBins = shape.getNumberOfPhaseSpaceSplitBins()
     # Initialize result containers
     self._resultShape = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShape.Reset()
     self._resultShape.SetTitle("NQCDFinal_Total_%s"%moduleInfoString)
     self._resultShape.SetName("NQCDFinal_Total_%s"%moduleInfoString)
     self._resultShapeEWK = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapeEWK.Reset()
     self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s"%moduleInfoString)
     self._resultShapeEWK.SetName("NQCDFinal_EWK_%s"%moduleInfoString)
     self._resultShapePurity = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapePurity.Reset()
     self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s"%moduleInfoString)
     self._resultShapePurity.SetName("NQCDFinal_Purity_%s"%moduleInfoString)
     self._histogramsList = []
     myUncertaintyLabels = ["statData", "statEWK"]
     self._resultCountObject = extendedCount.ExtendedCount(0.0, [0.0, 0.0], myUncertaintyLabels)
     if optionDoNQCDByBinHistograms:
         for i in range(0, nSplitBins):
             hBin = aux.Clone(self._resultShape)
             hBin.SetTitle("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
             hBin.SetName("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
             self._histogramsList.append(hBin)
     if isinstance(self._resultShape, ROOT.TH2):
         self._doCalculate2D(nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels)
         return
     # Intialize counters for purity calculation in final shape binning
     myShapeDataSum = []
     myShapeDataSumUncert = []
     myShapeEwkSum = []
     myShapeEwkSumUncert = []
     for j in range(1,self._resultShape.GetNbinsX()+1):
         myShapeDataSum.append(0.0)
         myShapeDataSumUncert.append(0.0)
         myShapeEwkSum.append(0.0)
         myShapeEwkSumUncert.append(0.0)
     # Calculate results separately for each phase space bin and then combine
     for i in range(0, nSplitBins):
         # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
         h = shape.getDataDrivenQCDHistoForSplittedBin(i)
         hData = shape.getDataHistoForSplittedBin(i)
         hEwk = shape.getEwkHistoForSplittedBin(i)
         # Get normalization factor
         wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
         if self._optionUseInclusiveNorm:
             wQCDLabel = "Inclusive"
         wQCD = 0.0
         if not wQCDLabel in normFactors.keys():
             print ShellStyles.WarningLabel()+"No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..."%(wQCDLabel,shape.getHistoName())
         else:
             wQCD = normFactors[wQCDLabel]
         # Loop over bins in the shape histogram
         for j in range(1,h.GetNbinsX()+1):
             myResult = 0.0
             myStatDataUncert = 0.0
             myStatEwkUncert = 0.0
             if abs(h.GetBinContent(j)) > 0.00001: # Ignore zero bins
                 # Calculate result
                 myResult = h.GetBinContent(j) * wQCD
                 # Calculate abs. stat. uncert. for data and for MC EWK
                 myStatDataUncert = hData.GetBinError(j) * wQCD
                 myStatEwkUncert = hEwk.GetBinError(j) * wQCD
                 #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                 # Do not calculate here MC EWK syst.
             myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
             self._resultCountObject.add(myCountObject)
             if optionDoNQCDByBinHistograms:
                 self._histogramsList[i].SetBinContent(j, myCountObject.value())
                 self._histogramsList[i].SetBinError(j, myCountObject.statUncertainty())
             self._resultShape.SetBinContent(j, self._resultShape.GetBinContent(j) + myCountObject.value())
             self._resultShape.SetBinError(j, self._resultShape.GetBinError(j) + myCountObject.statUncertainty()**2) # Sum squared
             # Sum items for purity calculation
             myShapeDataSum[j-1] += hData.GetBinContent(j)*wQCD
             myShapeDataSumUncert[j-1] += (hData.GetBinError(j)*wQCD)**2
             myShapeEwkSum[j-1] += hEwk.GetBinContent(j)*wQCD
             myShapeEwkSumUncert[j-1] += (hEwk.GetBinError(j)*wQCD)**2
         h.Delete()
         hData.Delete()
         hEwk.Delete()
     # Take square root of uncertainties
     for j in range(1,self._resultShape.GetNbinsX()+1):
         self._resultShape.SetBinError(j, math.sqrt(self._resultShape.GetBinError(j)))
     # Print result
     print "NQCD Integral(%s) = %s "%(shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f"))
     # Print purity as function of final shape bins
     if optionPrintPurityByBins:
         print "Purity of shape %s"%shape.getHistoName()
         print "shapeBin purity purityUncert"
     for j in range (1,self._resultShape.GetNbinsX()+1):
         myPurity = 0.0
         myPurityUncert = 0.0
         if abs(myShapeDataSum[j-1]) > 0.000001:
             myPurity = 1.0 - myShapeEwkSum[j-1] / myShapeDataSum[j-1]
             myPurityUncert = errorPropagation.errorPropagationForDivision(myShapeEwkSum[j-1], math.sqrt(myShapeEwkSumUncert[j-1]), myShapeDataSum[j-1], math.sqrt(myShapeDataSumUncert[j-1]))
         # Store MC EWK content
         self._resultShapeEWK.SetBinContent(j, myShapeEwkSum[j-1])
         self._resultShapeEWK.SetBinError(j, math.sqrt(myShapeEwkSumUncert[j-1]))
         self._resultShapePurity.SetBinContent(j, myPurity)
         self._resultShapePurity.SetBinError(j, myPurityUncert)
         # Print purity info of final shape
         if optionPrintPurityByBins:
             myString = ""
             if j < self._resultShape.GetNbinsX():
                 myString = "%d..%d"%(self._resultShape.GetXaxis().GetBinLowEdge(j),self._resultShape.GetXaxis().GetBinUpEdge(j))
             else:
                 myString = ">%d"%(self._resultShape.GetXaxis().GetBinLowEdge(j))
             myString += " %.3f %.3f"%(myPurity, myPurityUncert)
             print myString
Exemplo n.º 13
0
def GetPurityHisto(hData, hOther, kwargs, subtractFromOne=True, printValues=False, hideZeros=True):
    '''
    if subtractFromOne:
    P = 1.0 - (EWK / Data)

    if not subtractFromOne:
    P = (EWK / Data)
    '''

    # Prepare a new histo
    h = hData.Clone()    
    h.Reset("ICESM")
    ROOT.SetOwnership(h, True)
    histoName = kwargs["histoName"]
    title = "%s (%s)" % (histoName.replace(opts.folder+"/", "").rsplit("_")[1], GetControlRegionLabel(histoName))

    # Construct info table (debugging)
    table  = []
    align  = "{:>6} {:^20} {:>10} {:>10} {:>10} {:^3} {:<10}"
    header = align.format("Bin", "Range", "%s" % hOther.GetName(), "Data", "Purity", "+/-", "Error") #Purity = 1-EWK/Data
    hLine  = "="*70
    nBinsX = hData.GetNbinsX()
    table.append("{:^70}".format(title))
    table.append(hLine)
    table.append(header)
    table.append(hLine)

    # For-loop: All histogram bins
    for j in range (1, nBinsX+1):
        
        # Legacy: No idea why the code snippet I copied used "j=j-1" instead of "i=j". 
        i = j

        # Declare variables
        myPurity       = 0.0
        myPurityUncert = 0.0
        otherSum       = hOther.GetBinContent(i)
        otherSumUncert = hOther.GetBinError(i)
        dataSum        = hData.GetBinContent(i)
        dataSumUncert  = hData.GetBinError(i)  # hData.GetBinContent(i)
        
        # Treat negative bins for EWK (possible if -ve weights are applied)
        if otherSum < 0.0:
            Verbose("Sum is below 0 (Sum=%.3f +/- %.3f). Forcing value to 0.0." % (otherSum,  otherSumUncert))
            otherSum = 0.0 

        # Ignore zero bins
        if abs(dataSum) > 0.000001:
            if subtractFromOne:
                myPurity = 1.0 - otherSum / dataSum
            else:
                myPurity = otherSum / dataSum
            myPurityUncert = errorPropagation.errorPropagationForDivision(otherSum, otherSumUncert, dataSum, dataSumUncert)

        # Bin-range or overflow bin?
        binRange = "%.1f -> %.1f" % (hData.GetXaxis().GetBinLowEdge(j), hData.GetXaxis().GetBinUpEdge(j) )
        if j >= nBinsX:
            binRange = "> %.1f"   % (hData.GetXaxis().GetBinLowEdge(j) )

        # WARNING! Ugly trick so that zero points are not visible on canvas 
        if hideZeros:
            if myPurity == 0.0:
                myPurity       = -0.1
                myPurityUncert = +0.0001

        # Sanity check
        if myPurity > 1.0:
            Verbose("Bin %d) %.3f/%.3f = %.3f" % (i, otherSum, dataSum, myPurity), True)
            newPurity = 1.0
            newUncert = myPurityUncert
            Print("Purity exceeds 1.0 (P=%.3f +/- %.3f). Forcing value to P=%.3f +/- %.3f" % (myPurity,  myPurityUncert, newPurity, newUncert), False)
            myPurity  = newPurity                
#            if myPurity < 1.5: # allow a generous 10% for -ve MC weights (TTbar)
#                newPurity = 1.0
#                newUncert = myPurityUncert
#                Print("Purity exceeds 1.0 (P=%.3f +/- %.3f). Forcing value to P=%.3f +/- %.3f" % (myPurity,  myPurityUncert, newPurity, newUncert), False)
#                myPurity  = newPurity                
#            else:
#                raise Exception("Purity cannot exceed 100%% (=%s +/- %s)" % (myPurity*100, myPurityUncert*100) )

        # Fill histogram
        h.SetBinContent(j, myPurity)
        h.SetBinError(j, myPurityUncert)

        # Save information in table
        row = align.format(j, binRange, "%.1f" % otherSum, "%.1f" % dataSum, "%.3f" % myPurity, "+/-", "%.3f" % myPurityUncert)
        table.append(row)
        
    # Finalise table
    table.append(hLine)

    # Print purity as function of final shape bins
    if printValues:
        for i, line in enumerate(table):
            Print(line, i==0)

    return h
Exemplo n.º 14
0
def GetPurityHisto(hData, hEWK, kwargs, printValues=False, hideZeros=True):

    # Prepare a new histo
    h = hData.Clone()
    h.Reset("ICESM")
    ROOT.SetOwnership(h, True)

    # Construct info table (debugging)
    table = []
    align = "{:>6} {:^20} {:>10} {:>10} {:>10} {:^3} {:<10}"
    header = align.format("Bin", "Range", "EWK", "Data", "Purity", "+/-",
                          "Error")  #Purity = 1-EWK/Data
    hLine = "=" * 70
    nBinsX = hData.GetNbinsX()
    table.append("{:^70}".format("Histogram"))
    table.append(hLine)
    table.append(header)
    table.append(hLine)

    # For-loop: All histogram bins
    for j in range(1, nBinsX + 1):

        # Legeacy: No idea why the code snippet I copied used "j=j-1" instead of "i=j".
        i = j

        # Declare variables
        myPurity = 0.0
        myPurityUncert = 0.0
        ewkSum = hEWK.GetBinContent(i)
        ewkSumUncert = hEWK.GetBinError(i)
        dataSum = hData.GetBinContent(i)
        dataSumUncert = hData.GetBinError(i)  # hData.GetBinContent(i)

        # Treat negative bins for EWK (possible if -ve weights are applied)
        if ewkSum < 0.0:
            Verbose(
                "Sum is below 0 (Sum=%.3f +/- %.3f). Forcing value to 0.0." %
                (ewkSum, ewkSumUncert))
            ewkSum = 0.0

        # Ignore zero bins
        if abs(dataSum) > 0.000001:
            myPurity = 1.0 - ewkSum / dataSum
            myPurityUncert = errorPropagation.errorPropagationForDivision(
                ewkSum, ewkSumUncert, dataSum, dataSumUncert)

        # Bin-range or overflow bin?
        binRange = "%.1f -> %.1f" % (hData.GetXaxis().GetBinLowEdge(j),
                                     hData.GetXaxis().GetBinUpEdge(j))
        if j >= nBinsX:
            binRange = "> %.1f" % (hData.GetXaxis().GetBinLowEdge(j))

        # WARNING! Ugly trick so that zero points are not visible on canvas
        if hideZeros:
            if myPurity == 0.0:
                myPurity = -0.1
                myPurityUncert = +0.0001

        # Sanity check
        if myPurity > 1.0:
            if myPurity < 1.1:  # allow a generous 5% for -ve MC weights (TTbar)
                newPurity = 1.0
                newUncert = myPurityUncert
                Print(
                    "Purity exceeds 1.0 (P=%.3f +/- %.3f). Forcing value to P=%.3f +/- %.3f"
                    % (myPurity, myPurityUncert, newPurity, newUncert), False)
                myPurity = newPurity
            else:
                raise Exception("Purity cannot exceed 100%% (=%s +/- %s)" %
                                (myPurity * 100, myPurityUncert * 100))

        # Fill histogram
        h.SetBinContent(j, myPurity)
        h.SetBinError(j, myPurityUncert)

        # Save information in table
        row = align.format(j, binRange, "%.1f" % ewkSum, "%.1f" % dataSum,
                           "%.3f" % myPurity, "+/-", "%.3f" % myPurityUncert)
        table.append(row)

    # Finalise table
    table.append(hLine)

    # Print purity as function of final shape bins
    if printValues:
        for i, line in enumerate(table):
            Print(line, i == 0)

    return h
Exemplo n.º 15
0
    def CalculateTransferFactor(self,
                                binLabel,
                                hFakeB_CR1,
                                hFakeB_CR2,
                                hFakeB_CR3,
                                hFakeB_CR4,
                                verbose=False):
        '''
        Calculates the combined normalization and, if specified, 
        varies it up or down by factor (1+variation)
 
        TF = Transfer Factor
        SR = Signal Region
        CR = Control Region
        VR = Verification Region
        '''
        self.verbose = verbose

        # Obtain counts for QCD and EWK fakes
        lines = []

        # NOTES: Add EWKGenuineB TF, Add Data TF, add QCD TF, Add EWK TF, add MCONLY TFs
        nCR1_Error = ROOT.Double(0.0)
        nCR2_Error = ROOT.Double(0.0)
        nCR3_Error = ROOT.Double(0.0)
        nCR4_Error = ROOT.Double(0.0)

        # Get Events in all CRs and their associated errors
        nCR1 = hFakeB_CR1.IntegralAndError(1,
                                           hFakeB_CR1.GetNbinsX() + 1,
                                           nCR1_Error)
        nCR2 = hFakeB_CR2.IntegralAndError(1,
                                           hFakeB_CR2.GetNbinsX() + 1,
                                           nCR2_Error)
        nCR3 = hFakeB_CR3.IntegralAndError(1,
                                           hFakeB_CR3.GetNbinsX() + 1,
                                           nCR3_Error)
        nCR4 = hFakeB_CR4.IntegralAndError(1,
                                           hFakeB_CR4.GetNbinsX() + 1,
                                           nCR4_Error)

        # Calculate Transfer Factor (TF) from Control Region (R) to Signal Region (SR): R = N_CR1/ N_CR2
        TF = None
        TF_Up = None
        TF_Down = None
        TF_Error = None
        TF = (nCR1 / nCR2)
        TF_Error = errorPropagation.errorPropagationForDivision(
            nCR1, nCR1_Error, nCR2, nCR2_Error)
        TF_Up = TF + TF_Error
        if TF_Up > 1.0:
            TF_Up = 1.0
        TF_Down = TF - TF_Error
        if TF_Down < 0.0:
            TF_Down = 0.0
        lines.append("TF (bin=%s) = N_CR1 / N_CR2 = %f / %f =  %f +- %f" %
                     (binLabel, nCR1, nCR2, TF, TF_Error))

        # Calculate the transfer factors (R_{i}) where i is index of bin the Fake-b measurement is made in (pT and/or eta of ldg b-jet)
        if TF != None:
            # Replace bin label with histo title (has exact binning info)
            self._BinLabelMap[binLabel] = self.getNiceBinLabel(
                hFakeB_CR2.GetTitle())
            self._NEvtsCR1[binLabel] = nCR1
            self._NEvtsCR1_Error[binLabel] = nCR1_Error
            self._NEvtsCR2[binLabel] = nCR2
            self._NEvtsCR2_Error[binLabel] = nCR2_Error
            self._NEvtsCR3[binLabel] = nCR3
            self._NEvtsCR3_Error[binLabel] = nCR3_Error
            self._NEvtsCR4[binLabel] = nCR4
            self._NEvtsCR4_Error[binLabel] = nCR4_Error
            self._TF[binLabel] = TF
            self._TF_Error[binLabel] = TF_Error
            self._TF_Up[binLabel] = TF_Up
            self._TF_Down[binLabel] = TF_Down
            self._FakeBNormalization[binLabel] = TF  # TF
            self._FakeBNormalizationError[binLabel] = TF_Error  # Error(TF)
            self._FakeBNormalizationUp[binLabel] = TF_Up  # TF + Error
            self._FakeBNormalizationDown[binLabel] = TF_Down  # TF - Error

        # Store all information for later used (write to file)
        self._commentLines.extend(lines)

        # Print output and store comments
        if 0:
            for i, line in enumerate(lines, 1):
                Print(line, i == 1)
        return
Exemplo n.º 16
0
    def _doCalculate2D(self, nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels):
        '''
        Calculates the result for 2D histograms
        '''
        # Intialize counters for purity calculation in final shape binning
        myShapeDataSum = []
        myShapeDataSumUncert = []
        myShapeEwkSum = []
        myShapeEwkSumUncert = []
        myList = []
        for k in range(1,self._resultShape.GetNbinsY()+1):
            myList.append(0.0)
        for j in range(1,self._resultShape.GetNbinsX()+1):
            myShapeDataSum.append(myList[:])
            myShapeDataSumUncert.append(myList[:])
            myShapeEwkSum.append(myList[:])
            myShapeEwkSumUncert.append(myList[:])

        # Calculate results separately for each phase-space bin, and then combine them to get inclusive result
        for i in range(0, nSplitBins):
            # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
            h     = shape.getDataDrivenQCDHistoForSplittedBin(i)
            hData = shape.getDataHistoForSplittedBin(i)
            hEwk  = shape.getEwkHistoForSplittedBin(i)

            # Get normalization factor
            wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
            if self._optionUseInclusiveNorm:
                wQCDLabel = "Inclusive"
            wQCD = 0.0
            if not wQCDLabel in normFactors.keys():
                msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel, shape.getHistoName())
                print ShellStyles.WarningLabel() + msg
            else:
                wQCD = normFactors[wQCDLabel]
            # Loop over bins in the shape histogram
            for j in range(1,h.GetNbinsX()+1):
                for k in range(1,h.GetNbinsY()+1):
                    myResult = 0.0
                    myStatDataUncert = 0.0
                    myStatEwkUncert = 0.0
                    if abs(h.GetBinContent(j,k)) > 0.00001: # Ignore zero bins
                        # Calculate result
                        myResult = h.GetBinContent(j,k) * wQCD
                        # Calculate abs. stat. uncert. for data and for MC EWK
                        myStatDataUncert = hData.GetBinError(j,k) * wQCD
                        myStatEwkUncert = hEwk.GetBinError(j,k) * wQCD
                        #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                        # Do not calculate here MC EWK syst.
                    myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
                    self._resultCountObject.add(myCountObject)
                    if optionDoNQCDByBinHistograms:
                        self._histogramsList[i].SetBinContent(j, k, myCountObject.value())
                        self._histogramsList[i].SetBinError(j, k, myCountObject.statUncertainty())
                    self._resultShape.SetBinContent(j, k, self._resultShape.GetBinContent(j, k) + myCountObject.value())
                    self._resultShape.SetBinError(j, k, self._resultShape.GetBinError(j, k) + myCountObject.statUncertainty()**2) # Sum squared
                    # Sum items for purity calculation
                    myShapeDataSum[j-1][k-1] += hData.GetBinContent(j,k)*wQCD
                    myShapeDataSumUncert[j-1][k-1] += (hData.GetBinError(j,k)*wQCD)**2
                    myShapeEwkSum[j-1][k-1] += hEwk.GetBinContent(j,k)*wQCD
                    myShapeEwkSumUncert[j-1][k-1] += (hEwk.GetBinError(j,k)*wQCD)**2
            h.Delete()
            hData.Delete()
            hEwk.Delete()
        # Take square root of uncertainties
        for j in range(1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                self._resultShape.SetBinError(j, k, math.sqrt(self._resultShape.GetBinError(j, k)))

        # Print result
        print "NQCD Integral(%s) = %s "%(shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f"))

        # Print purity as function of final shape bins
        if optionPrintPurityByBins:
            print "Purity of shape %s"%shape.getHistoName()
            print "shapeBin purity purityUncert"
        for j in range (1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                myPurity = 0.0
                myPurityUncert = 0.0
                if abs(myShapeDataSum[j-1][k-1]) > 0.000001:
                    myPurity = 1.0 - myShapeEwkSum[j-1][k-1] / myShapeDataSum[j-1][k-1]
                    myPurityUncert = errorPropagation.errorPropagationForDivision(myShapeEwkSum[j-1][k-1], math.sqrt(myShapeEwkSumUncert[j-1][k-1]), myShapeDataSum[j-1][k-1], math.sqrt(myShapeDataSumUncert[j-1][k-1]))
                # Store MC EWK content
                self._resultShapeEWK.SetBinContent(j, k, myShapeEwkSum[j-1][k-1])
                self._resultShapeEWK.SetBinError(j, k, math.sqrt(myShapeEwkSumUncert[j-1][k-1]))
                self._resultShapePurity.SetBinContent(j, k, myPurity)
                self._resultShapePurity.SetBinError(j, k, myPurityUncert)
                # Print purity info of final shape
                if optionPrintPurityByBins:
                    myString = ""
                    if j < self._resultShape.GetNbinsX():
                        myString = "%d..%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j),self._resultShape.GetXaxis().GetBinUpEdge(j))
                    else:
                        myString = ">%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j))
                    if k < self._resultShape.GetNbinsY():
                        myString = "%d..%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k),self._resultShape.GetYaxis().GetBinUpEdge(k))
                    else:
                        myString = ">%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k))
                    myString += " %.3f %.3f"%(myPurity, myPurityUncert)
                    print myString
        return
 def _doCalculate(self, shape, moduleInfoString, normFactors,
                  optionPrintPurityByBins, optionDoNQCDByBinHistograms):
     # Calculate final shape in signal region (shape * w_QCD)
     nSplitBins = shape.getNumberOfPhaseSpaceSplitBins()
     # Initialize result containers
     self._resultShape = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShape.Reset()
     self._resultShape.SetTitle("NQCDFinal_Total_%s" % moduleInfoString)
     self._resultShape.SetName("NQCDFinal_Total_%s" % moduleInfoString)
     self._resultShapeEWK = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapeEWK.Reset()
     self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s" % moduleInfoString)
     self._resultShapeEWK.SetName("NQCDFinal_EWK_%s" % moduleInfoString)
     self._resultShapePurity = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapePurity.Reset()
     self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s" %
                                      moduleInfoString)
     self._resultShapePurity.SetName("NQCDFinal_Purity_%s" %
                                     moduleInfoString)
     self._histogramsList = []
     myUncertaintyLabels = ["statData", "statEWK"]
     self._resultCountObject = extendedCount.ExtendedCount(
         0.0, [0.0, 0.0], myUncertaintyLabels)
     if optionDoNQCDByBinHistograms:
         for i in range(0, nSplitBins):
             hBin = aux.Clone(self._resultShape)
             hBin.SetTitle(
                 "NQCDFinal_%s_%s" %
                 (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(
                     " ", ""), moduleInfoString))
             hBin.SetName(
                 "NQCDFinal_%s_%s" %
                 (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(
                     " ", ""), moduleInfoString))
             self._histogramsList.append(hBin)
     if isinstance(self._resultShape, ROOT.TH2):
         self._doCalculate2D(nSplitBins, shape, normFactors,
                             optionPrintPurityByBins,
                             optionDoNQCDByBinHistograms,
                             myUncertaintyLabels)
         return
     # Intialize counters for purity calculation in final shape binning
     myShapeDataSum = []
     myShapeDataSumUncert = []
     myShapeEwkSum = []
     myShapeEwkSumUncert = []
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         myShapeDataSum.append(0.0)
         myShapeDataSumUncert.append(0.0)
         myShapeEwkSum.append(0.0)
         myShapeEwkSumUncert.append(0.0)
     # Calculate results separately for each phase space bin and then combine
     for i in range(0, nSplitBins):
         # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
         h = shape.getDataDrivenQCDHistoForSplittedBin(i)
         hData = shape.getDataHistoForSplittedBin(i)
         hEwk = shape.getEwkHistoForSplittedBin(i)
         # Get normalization factor
         wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
         if self._optionUseInclusiveNorm:
             wQCDLabel = "Inclusive"
         wQCD = 0.0
         if not wQCDLabel in normFactors.keys():
             print ShellStyles.WarningLabel(
             ) + "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (
                 wQCDLabel, shape.getHistoName())
         else:
             wQCD = normFactors[wQCDLabel]
         # Loop over bins in the shape histogram
         for j in range(1, h.GetNbinsX() + 1):
             myResult = 0.0
             myStatDataUncert = 0.0
             myStatEwkUncert = 0.0
             if abs(h.GetBinContent(j)) > 0.00001:  # Ignore zero bins
                 # Calculate result
                 myResult = h.GetBinContent(j) * wQCD
                 # Calculate abs. stat. uncert. for data and for MC EWK
                 myStatDataUncert = hData.GetBinError(j) * wQCD
                 myStatEwkUncert = hEwk.GetBinError(j) * wQCD
                 #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                 # Do not calculate here MC EWK syst.
             myCountObject = extendedCount.ExtendedCount(
                 myResult, [myStatDataUncert, myStatEwkUncert],
                 myUncertaintyLabels)
             self._resultCountObject.add(myCountObject)
             if optionDoNQCDByBinHistograms:
                 self._histogramsList[i].SetBinContent(
                     j, myCountObject.value())
                 self._histogramsList[i].SetBinError(
                     j, myCountObject.statUncertainty())
             self._resultShape.SetBinContent(
                 j,
                 self._resultShape.GetBinContent(j) + myCountObject.value())
             self._resultShape.SetBinError(
                 j,
                 self._resultShape.GetBinError(j) +
                 myCountObject.statUncertainty()**2)  # Sum squared
             # Sum items for purity calculation
             myShapeDataSum[j - 1] += hData.GetBinContent(j) * wQCD
             myShapeDataSumUncert[j - 1] += (hData.GetBinError(j) * wQCD)**2
             myShapeEwkSum[j - 1] += hEwk.GetBinContent(j) * wQCD
             myShapeEwkSumUncert[j - 1] += (hEwk.GetBinError(j) * wQCD)**2
         h.Delete()
         hData.Delete()
         hEwk.Delete()
     # Take square root of uncertainties
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         self._resultShape.SetBinError(
             j, math.sqrt(self._resultShape.GetBinError(j)))
     # Print result
     print "NQCD Integral(%s) = %s " % (
         shape.getHistoName(),
         self._resultCountObject.getResultStringFull("%.1f"))
     # Print purity as function of final shape bins
     if optionPrintPurityByBins:
         print "Purity of shape %s" % shape.getHistoName()
         print "shapeBin purity purityUncert"
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         myPurity = 0.0
         myPurityUncert = 0.0
         if abs(myShapeDataSum[j - 1]) > 0.000001:
             myPurity = 1.0 - myShapeEwkSum[j - 1] / myShapeDataSum[j - 1]
             myPurityUncert = errorPropagation.errorPropagationForDivision(
                 myShapeEwkSum[j - 1],
                 math.sqrt(myShapeEwkSumUncert[j - 1]),
                 myShapeDataSum[j - 1],
                 math.sqrt(myShapeDataSumUncert[j - 1]))
         # Store MC EWK content
         self._resultShapeEWK.SetBinContent(j, myShapeEwkSum[j - 1])
         self._resultShapeEWK.SetBinError(
             j, math.sqrt(myShapeEwkSumUncert[j - 1]))
         self._resultShapePurity.SetBinContent(j, myPurity)
         self._resultShapePurity.SetBinError(j, myPurityUncert)
         # Print purity info of final shape
         if optionPrintPurityByBins:
             myString = ""
             if j < self._resultShape.GetNbinsX():
                 myString = "%d..%d" % (
                     self._resultShape.GetXaxis().GetBinLowEdge(j),
                     self._resultShape.GetXaxis().GetBinUpEdge(j))
             else:
                 myString = ">%d" % (
                     self._resultShape.GetXaxis().GetBinLowEdge(j))
             myString += " %.3f %.3f" % (myPurity, myPurityUncert)
             print myString
    def CalculateTransferFactor(self, binLabel, hFakeB_Baseline, hFakeB_Inverted, verbose=False):
        '''
        Calculates the combined normalization and, if specified, 
        varies it up or down by factor (1+variation)
 
        TF = Transfer Factor
        SR = Signal Region
        CR = Control Region
        VR = Verification Region
        '''
        self.verbose = verbose
        

        print "======= Calculate TransferFactor "


        # Obtain counts for QCD and EWK fakes
        lines = []

        # NOTES: Add EWKGenuineB TF, Add Data TF, add QCD TF, Add EWK TF, add MCONLY TFs
        nSR_Error = ROOT.Double(0.0)
        nCR_Error = ROOT.Double(0.0)
        # nTotalError = ROOT.TMath.Sqrt(nSRerror**2 + nCRError**2)
        
        nSR = hFakeB_Baseline.IntegralAndError(1, hFakeB_Baseline.GetNbinsX()+1, nSR_Error)
        nCR = hFakeB_Inverted.IntegralAndError(1, hFakeB_Inverted.GetNbinsX()+1, nCR_Error)
        # nTotal = nSR + nCR

        # Calculate Transfer Factor (TF) from Control Region (R) to Signal Region (SR): R = N_CR1/ N_CR2
        TF       = None
        TF_Up    = None
        TF_Down  = None
        TF_Error = None

        if 1: ## nTotal > 0.0:
            TF = nSR / nCR
            TF_Error = errorPropagation.errorPropagationForDivision(nSR, nSR_Error, nCR, nCR_Error)
            TF_Up = TF + TF_Error
            if TF_Up > 1.0:
                TF_Up = 1.0
            TF_Down = TF - TF_Error
            if TF_Down < 0.0:
                TF_Down = 0.0
        lines.append("TF (bin=%s) = N_CR1 / N_CR2 = %f / %f =  %f +- %f" % (binLabel, nSR, nCR, TF, TF_Error) )

        # Calculate the combined normalization factor (f_fakes = w*f_QCD + (1-w)*f_EWKfakes)
        fakeRate      = None
        fakeRateError = None
        fakeRateUp    = None
        fakeRateDown  = None
        if TF != None:
            #     fakeRate = w*self._TF[binLabel] + (1.0-w)*self._ewkNormalization[binLabel]
            #     fakeRateUp = wUp*self._TF[binLabel] + (1.0-wUp)*self._ewkNormalization[binLabel]
            #     fakeRateDown = wDown*self._TF[binLabel] + (1.0-wDown)*self._ewkNormalization[binLabel]
            #     fakeRateErrorPart1 = errorPropagation.errorPropagationForProduct(w, wError, self._TF[binLabel], self._TFError[binLabel])
            #     fakeRateErrorPart2 = errorPropagation.errorPropagationForProduct(w, wError, self._ewkNormalization[binLabel], self._ewkNormalizationError[binLabel])
            #     fakeRateError = ROOT.TMath.Sqrt(fakeRateErrorPart1**2 + fakeRateErrorPart2**2)
            
            # Replace bin label with histo title (has exact binning info)
            self._BinLabelMap[binLabel] = hFakeB_Inverted.GetTitle()
            self._TF[binLabel   ]       = TF
            self._TF_Error[binLabel]    = TF_Error
            self._TF_Up[binLabel]       = TF_Up
            self._TF_Down[binLabel]     = TF_Down
        # self._combinedFakesNormalizationError[binLabel] = fakeRateError
        # self._combinedFakesNormalizationUp[binLabel] = fakeRateUp
        # self._combinedFakesNormalizationDown[binLabel] = fakeRateDown

        # Store all information for later used (write to file)
        self._commentLines.extend(lines)

        # Print output and store comments
        if 1:
            for i, line in enumerate(lines, 1):
                Print(line, i==1)
        return