def _sanityChecks(self, dsetMgr, dirName, plotName):
        '''
        Check existence of histograms
        '''
        # Definitions
        myStatus      = True
        myFoundStatus = True
        
        # For-loop: All EWK datasets
        for d in dsetMgr.getDataset("EWK").datasets:
            if not d.hasRootHisto("%s/%s" % (dirName,plotName) ):
                myFoundStatus = False

        # If something is wrong
        if not myFoundStatus:
            myStatus = False
            msg = "Skipping '%s', because it does not exist for all EWK datasets (you probably forgot to set histo level to Vital when producing the multicrab)!" % (plotName)
            Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), True)
        else:
            (myRootObject, myRootObjectName) = dsetMgr.getDataset("EWK").getFirstRootHisto("%s/%s" % (dirName,plotName) )
            if isinstance(myRootObject, ROOT.TH2):
                msg ="Skipping '%s', because it is not a TH1 object" % (plotName)
                Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), True)
                myStatus = False
            myRootObject.Delete()
        return myStatus
Exemplo n.º 2
0
 def _sanityChecks(self, dsetMgr, dirName, plotName):
     myStatus = True
     myFoundStatus = True
     for d in dsetMgr.getDataset("EWK").datasets:
         if not d.hasRootHisto("%s/%s"%(dirName,plotName)):
             myFoundStatus = False
     if not myFoundStatus:
         myStatus = False
         print ShellStyles.WarningLabel()+"Skipping '%s', because it does not exist for all EWK datasets (you probably forgot to set histo level to Vital when producing the multicrab)!"%(plotName)+ShellStyles.NormalStyle()
     else:
         (myRootObject, myRootObjectName) = dsetMgr.getDataset("EWK").getFirstRootHisto("%s/%s"%(dirName,plotName))
         if isinstance(myRootObject, ROOT.TH2):
             print ShellStyles.WarningLabel()+"Skipping '%s', because it is not a TH1 object!"%(plotName)+ShellStyles.NormalStyle()
             myStatus = False
         myRootObject.Delete()
     return myStatus
Exemplo n.º 3
0
    def __init__(
            self,
            dataPath,
            ewkPath,
            dsetMgr,
            luminosity,
            moduleInfoString,
            normFactors,
            #dataDrivenFakeTaus=False,
            #shapeOnly=False,
            #displayPurityBreakdown=False,
            #optionUseInclusiveNorm=False,
            optionCalculateQCDNormalizationSyst=True,
            normDataSrc=None,
            normEWKSrc=None,
            optionUseInclusiveNorm=False):
        self._shapePlots = []
        self._shapePlotLabels = []
        self._QCDNormalizationSystPlots = []
        self._QCDNormalizationSystPlotLabels = []
        self._moduleInfoString = moduleInfoString

        self._useInclusiveNorm = optionUseInclusiveNorm
        if len(normFactors.keys()) == 1 and normFactors.keys(
        )[0] == "Inclusive":
            self._useInclusiveNorm = True

        print ShellStyles.HighlightStyle(
        ) + "...Obtaining final shape" + ShellStyles.NormalStyle()
        # Determine list of plots to consider
        myObjects = dsetMgr.getDataset("Data").getDirectoryContent(dataPath)
        # Loop over plots to consider
        i = 0
        for plotName in myObjects:
            i += 1
            print ShellStyles.HighlightStyle(
            ) + "...Obtaining ctrl plot %d/%d: %s%s" % (
                i, len(myObjects), plotName, ShellStyles.NormalStyle())
            # Check that histograms exist
            mySkipStatus = self._sanityChecks(dsetMgr, dataPath,
                                              plotName) and self._sanityChecks(
                                                  dsetMgr, ewkPath, plotName)
            if not mySkipStatus:
                continue
            # Obtain shape plots (the returned object is not owned)

#	    print "DEBUG: ewkPath: ", ewkPath

            myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath,
                                                       dsetMgr, plotName,
                                                       luminosity, normFactors)
            # Obtain plots for systematics coming from met shape difference for control plots
            if optionCalculateQCDNormalizationSyst:
                if isinstance(myShapeHisto, ROOT.TH2):
                    print ShellStyles.WarningLabel(
                    ) + "Skipping met shape uncertainty because histogram has more than 1 dimensions!"
                else:
                    self._obtainQCDNormalizationSystHistograms(
                        myShapeHisto, dsetMgr, plotName, luminosity,
                        normDataSrc, normEWKSrc)
 def _obtainFinalShapeHistogram(self, histoName):
     if histoName == None:
         raise Exception(ShellStyles.ErrorLabel()+"You forgot to give final shape histo name or to cache the final shape histogram!")
     print ShellStyles.WarningLabel()+"Final shape histo was not cached to QCDInvertedSystematics. Obtaining final shape from '%s'."%histoName
     # Obtain final result
     myFinalShape = DataDrivenQCDShape(self._dsetMgr, "Data", "EWK", histoName, self._luminosity, rebinList=self._myRebinList)
     myFinalShapeResult = QCDInvertedShape(myFinalShape, self._moduleInfoString, self._normFactors, optionPrintPurityByBins=False)
     self._hFinalShape = myFinalShapeResult.getResultShape().Clone()
Exemplo n.º 5
0
    def __init__(self,
                 dsetMgr,
                 dsetLabelData,
                 dsetLabelEwk,
                 histoName,
                 dataPath,
                 ewkPath,
                 luminosity,
                 optionUseInclusiveNorm,
                 verbose=False):
        self._verbose = verbose
        self._uniqueN = 0
        self._splittedHistoReader = splittedHistoReader.SplittedHistoReader(
            dsetMgr, dsetLabelData)
        self._histoName = histoName
        self._optionUseInclusiveNorm = optionUseInclusiveNorm  #ALEX-NEW
        dataFullName = os.path.join(dataPath, histoName)
        ewkFullName = os.path.join(ewkPath, histoName)

        # ALEX-NEW
        if (self._optionUseInclusiveNorm):
            msg = "Disabled call for getting splitted histograms. Getting \"Inclusive\" histogram only instead."
            self.Verbose(ShellStyles.WarningLabel() + msg, self._verbose)
            self._dataList = list(
                self._getInclusiveHistogramsFromSingleSource(
                    dsetMgr, dsetLabelData, dataFullName,
                    luminosity))  # was called by default
            self._ewkList = list(
                self._getInclusiveHistogramsFromSingleSource(
                    dsetMgr, dsetLabelEwk, ewkFullName,
                    luminosity))  # was called by default
        else:
            msg = "This splitted histograms method is not validated! Use \"Inclusive\" histogram only instead."
            self.Print(ShellStyles.WarningLabel() + msg, False)
            self._dataList = list(
                self._splittedHistoReader.getSplittedBinHistograms(
                    dsetMgr, dsetLabelData, dataFullName,
                    luminosity))  #FIXME: Does this work for Inclusive?
            self._ewkList = list(
                self._splittedHistoReader.getSplittedBinHistograms(
                    dsetMgr, dsetLabelEwk, ewkFullName,
                    luminosity))  #FIXME: Does this work for Inclusive?
        return
Exemplo n.º 6
0
    def __init__(self, dataPath, ewkPath, dsetMgr, luminosity, moduleInfoString, normFactors,
                 optionDoFakeBNormalisationSyst=True, normDataSrc=None, normEWKSrc=None,
                 optionUseInclusiveNorm=False, keyList=[], verbose=False):
        self._verbose = verbose
        self._shapePlots = []
        self._shapePlotLabels = []
        self._QCDNormalizationSystPlots = []
        self._QCDNormalizationSystPlotLabels = []
        self._moduleInfoString = moduleInfoString
        self._useInclusiveNorm = optionUseInclusiveNorm
        if len(normFactors.keys()) == 1 and normFactors.keys()[0] == "Inclusive":
            self._useInclusiveNorm = True
        self._histoPathsData= self._GetHistoPaths(dsetMgr, "Data", dataPath, keyList)
        if ewkPath == dataPath:
            self._histoPathsEWK = self._histoPathsData
        else:
            self._histoPathsEWK  = self._GetHistoPaths(dsetMgr, "EWK" , ewkPath , keyList)
        
        # Sanity check
        if len(self._histoPathsEWK) != len(self._histoPathsData):
            msg = "List of histograms for EWK does not match in size that of Data"
            raise Exception(ShellStyles.ErrorLabel() + msg + ShellStyles.NormalStyle())
            
        # For-Loop: All plots to consider
        for i, plotName in enumerate(self._histoPathsData, 1):

            # Inform user of progress
            msg = "{:<9} {:>3} {:<1} {:<3} {:<80}".format("Histogram", "%i" % i, "/", "%s:" % (len(self._histoPathsData)), os.path.join(dataPath, plotName) )
            self.PrintFlushed(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), False)

            if "JetEtaPhi_AfterAllSelections" in plotName:
                continue 

            # Ensure that histograms exist && pass other sanity checks
            dataOk = self._sanityChecks(dsetMgr, dataPath, plotName) 
            ewkOk  = self._sanityChecks(dsetMgr, ewkPath , plotName)

            if dataOk*ewkOk == False:
                self.Print(ShellStyles.ErrorStyle() + msg + ShellStyles.NormalStyle(), i==1)
                continue
            
            self.Verbose("Obtaining shape plots (the returned object is not owned)", True)
            myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath, dsetMgr, plotName, luminosity, normFactors)

            # Obtain plots for systematics coming from invariant mass shape difference
            if optionDoFakeBNormalisationSyst:
                if isinstance(myShapeHisto, ROOT.TH2):
                    msg = "Skipping invariant mass shape uncertainty because histogram has more than 1 dimensions!"
                    self.Print(ShellStyles.WarningLabel() + msg, True)
                else:
                    self._obtainQCDNormalizationSystHistograms(myShapeHisto, dsetMgr, plotName, luminosity, normDataSrc, normEWKSrc) #iro: fixme (missing plots)
            
        msg = "Obtaining final shape from data path %s" % (ShellStyles.NoteStyle() + dataPath + ShellStyles.NormalStyle())
        self.Verbose(msg, True)
        return
Exemplo n.º 7
0
 def _findCommonAvailableModules(self, itemLabel, primaryLabel, primaryList,
                                 otherLabel, otherList):
     availableList = []
     # Loop over first list to find common items
     for item in primaryList:
         if item in otherList:
             availableList.append(item)
         else:
             if not item in otherList:
                 print ShellStyles.WarningLabel(
                 ) + " %s selection: item '%s' is available in '%s', but missing from '%s'!" % (
                     itemLabel, item, primaryLabel, otherLabel)
     # Return list of items available in both multicrab directories
     return availableList
    def getIntegratedPurityForShapeHisto(self):
        '''
        Return the QCD purity in bins of the final shape
        '''
        hData = self.getIntegratedDataHisto()
        hEwk = self.getIntegratedEwkHisto()
        # newName  =  ("_".join(hData.GetName().split("_", 2)[:2]) + "_IntegratedPurity_" + str(self._uniqueN) )
        newName = hData.GetName() + "_Purity"
        h = aux.Clone(hData, newName)
        nameList = self._dataList[0].GetName().split("_")
        newTitle = "PurityByFinalShapeBin_%s" % nameList[0][:len(nameList[0]) -
                                                            1]
        h.SetTitle(newTitle)
        self._uniqueN += 1

        # For-loop: All bins
        for i in range(1, h.GetNbinsX() + 1):
            myPurity = 0.0
            myUncert = 0.0
            nData = hData.GetBinContent(i)
            nEWK = hEwk.GetBinContent(i)

            # Calculate the purity
            if (nData > 0.0):

                myPurity = (nData - nEWK) / nData

                # Sanity check
                if myPurity < 0.0:
                    myPurity = 0.0
                    myUncert = 0.0
                else:
                    # Assume binomial error
                    myUncertSq = myPurity * (1.0 - myPurity) / nData
                    if myUncertSq >= 0.0:
                        myUncert = sqrt(myUncertSq)
                    else:
                        msg = "Purity is greater than 1 (%.4f) in bin %i of histogram %s" % (
                            myPurity, i, h.GetName())
                        self.Verbose(ShellStyles.WarningLabel() + msg, True)
                        myUncert = 0.0

            # Set the purity value for the given bin
            h.SetBinContent(i, myPurity)
            h.SetBinError(i, myUncert)
        return h
Exemplo n.º 9
0
    def getIntegratedPurityForShapeHisto(self):
        '''
        Return the QCD purity in bins of the final shape
        '''
        hData = self.getIntegratedDataHisto()
        hEwk = self.getIntegratedEwkHisto()
        #cloneName =  "%s_purity_%d" % (hData, self._uniqueN) # original code
        cloneName = ("_".join(hData.GetName().split("_", 2)[:2]) +
                     "_IntegratedPurity_" + str(self._uniqueN))
        h = aux.Clone(hData, cloneName)
        nameList = self._dataList[0].GetName().split("_")
        h.SetTitle("PurityByFinalShapeBin_%s" %
                   nameList[0][:len(nameList[0]) - 1])
        self._uniqueN += 1

        # For-loop: All bins
        for i in range(1, h.GetNbinsX() + 1):
            myPurity = 0.0
            myUncert = 0.0
            if (hData.GetBinContent(i) > 0.0):
                myPurity = (hData.GetBinContent(i) -
                            hEwk.GetBinContent(i)) / hData.GetBinContent(i)
                if myPurity < 0.0:
                    myPurity = 0.0
                    myUncert = 0.0
                else:
                    # Assume binomial error
                    myUncertSq = myPurity * (1.0 -
                                             myPurity) / hData.GetBinContent(i)
                    if myUncertSq >= 0.0:
                        myUncert = sqrt(myUncertSq)
                    else:
                        msg = "Purity is greater than 1 (%.4f) in bin %i of histogram %s" % (
                            myPurity, i, h.GetName())
                        self.Verbose(ShellStyles.WarningLabel() + msg, True)
                        myUncert = 0.0
            h.SetBinContent(i, myPurity)
            h.SetBinError(i, myUncert)
        return h
Exemplo n.º 10
0
def main():

    # Object for selecting data eras, search modes, and optimization modes
    myModuleSelector = analysisModuleSelector.AnalysisModuleSelector()

    # Obtain multicrab directory
    myMulticrabDir = "."
    if opts.mcrab != None:
        myMulticrabDir = opts.mcrab
    if not os.path.exists("%s/multicrab.cfg" % myMulticrabDir):
        msg = "No multicrab directory found at path '%s'! Please check path or specify it with --mcrab!" % (
            myMulticrabDir)
        raise Exception(ShellStyles.ErrorLabel() + msg +
                        ShellStyles.NormalStyle())
    if len(opts.shape) == 0:
        raise Exception(
            ShellStyles.ErrorLabel() +
            "Provide a shape identifierwith --shape (for example MT)!" +
            ShellStyles.NormalStyle())

    # Obtain dsetMgrCreator and register it to module selector
    dsetMgrCreator = dataset.readFromMulticrabCfg(directory=myMulticrabDir)

    # Obtain systematics names
    mySystematicsNamesRaw = dsetMgrCreator.getSystematicVariationSources()
    mySystematicsNames = []
    for item in mySystematicsNamesRaw:
        mySystematicsNames.append("%sPlus" % item)
        mySystematicsNames.append("%sMinus" % item)
    if opts.test:
        mySystematicsNames = []  #[mySystematicsNames[0]] #FIXME

    # Set the primary source
    myModuleSelector.setPrimarySource(label=opts.analysisName,
                                      dsetMgrCreator=dsetMgrCreator)

    # Select modules
    myModuleSelector.doSelect(opts=None)  #FIXME: (opts=opts)

    # Loop over era/searchMode/optimizationMode combos
    myDisplayStatus = True
    myTotalModules = myModuleSelector.getSelectedCombinationCount() * (
        len(mySystematicsNames) + 1) * len(opts.shape)
    Verbose("Found %s modules in total" % (myTotalModules), True)

    count, nEras, nSearchModes, nOptModes, nSysVars = myModuleSelector.getSelectedCombinationCountIndividually(
    )
    if nSysVars > 0:
        msg = "Will run over %d modules (%d eras x %d searchModes x %d optimizationModes x %d systematic variations)" % (
            count, nEras, nSearchModes, nOptModes, nSysVars)
    else:
        msg = "Will run over %d modules (%d eras x %d searchModes x %d optimizationModes)" % (
            count, nEras, nSearchModes, nOptModes)
    Print(msg, True)

    # Create pseudo-multicrab creator
    myOutputCreator = pseudoMultiCrabCreator.PseudoMultiCrabCreator(
        opts.analysisName, myMulticrabDir)

    # Make time stamp for start time
    myGlobalStartTime = time.time()

    iModule = 0
    # For-loop: All Shapes
    for shapeType in opts.shape:

        # Initialize
        myOutputCreator.initialize(shapeType, prefix="")

        msg = "Creating dataset for shape \"%s\"%s" % (
            shapeType, ShellStyles.NormalStyle())
        Verbose(ShellStyles.HighlightStyle() + msg, True)

        # Get lists of settings
        erasList = myModuleSelector.getSelectedEras()
        modesList = myModuleSelector.getSelectedSearchModes()
        optList = myModuleSelector.getSelectedOptimizationModes()
        optList.append("")  #append the default opt mode!

        # For-Loop over era, searchMode, and optimizationMode options
        for era in erasList:
            for searchMode in modesList:
                for optimizationMode in optList:

                    Verbose(
                        "era = %s, searchMode = %s, optMode = %s" %
                        (era, searchMode, optimizationMode), True)
                    # If an optimization mode is defined in options skip the rest
                    if opts.optMode != None:
                        if optimizationMode != opts.optMode:
                            continue

                    # Obtain normalization factors
                    myNormFactors = importNormFactors(era, searchMode,
                                                      optimizationMode,
                                                      opts.mcrab)

                    # Nominal module
                    myModuleInfoString = getModuleInfoString(
                        era, searchMode, optimizationMode)
                    iModule += 1

                    # Inform user of what is being processes
                    msg = "Module %d/%d:%s %s/%s" % (
                        iModule, myTotalModules, ShellStyles.NormalStyle(),
                        myModuleInfoString, shapeType)
                    Print(ShellStyles.CaptionStyle() + msg, True)

                    # Keep time
                    myStartTime = time.time()

                    Verbose("Create dataset manager with given settings", True)
                    nominalModule = ModuleBuilder(opts, myOutputCreator)
                    nominalModule.createDsetMgr(myMulticrabDir, era,
                                                searchMode, optimizationMode)

                    if (iModule == 1):
                        if opts.verbose:
                            nominalModule.debug()

                    doQCDNormalizationSyst = False  #FIXME
                    if not doQCDNormalizationSyst:
                        msg = "Disabling systematics"
                        Print(ShellStyles.WarningLabel() + msg, True)
                    nominalModule.buildModule(opts.dataSrc, opts.ewkSrc,
                                              myNormFactors["nominal"],
                                              doQCDNormalizationSyst,
                                              opts.normDataSrc,
                                              opts.normEwkSrc)

                    if len(mySystematicsNames) > 0:
                        Print(
                            "Adding QCD normalization systematics (iff also other systematics  present) ",
                            True)
                        nominalModule.buildQCDNormalizationSystModule(
                            opts.dataSrc, opts.ewkSrc)

                    # FIXME: add quark gluon weighting systematics!
                    if 0:
                        Print("Adding Quark/Gluon weighting systematics", True)
                        nominalModule.buildQCDQuarkGluonWeightingSystModule(
                            opts.dataSrc, opts.ewkSrc,
                            myNormFactors["FakeWeightingUp"],
                            myNormFactors["FakeWeightingDown"], False,
                            opts.normDataSrc, opts.normEwkSrc)

                    Verbose("Deleting nominal module", True)
                    nominalModule.delete()

                    Verbose("Printing time estimate", True)
                    printTimeEstimate(myGlobalStartTime, myStartTime, iModule,
                                      myTotalModules)

                    Verbose("Now do the rest of systematics variations", True)
                    for syst in mySystematicsNames:
                        iModule += 1
                        msg = "Analyzing systematics variations %d/%d: %s/%s/%s" % (
                            iModule, myTotalModules, myModuleInfoString, syst,
                            shapeType)
                        Print(
                            ShellStyles.CaptionStyle() + msg +
                            ShellStyles.NormalStyle(), True)
                        myStartTime = time.time()
                        systModule = ModuleBuilder(opts, myOutputCreator)
                        # Create dataset manager with given settings
                        systModule.createDsetMgr(myMulticrabDir,
                                                 era,
                                                 searchMode,
                                                 optimizationMode,
                                                 systematicVariation=syst)

                        # Build asystematics module
                        systModule.buildModule(opts.dataSrc, opts.ewkSrc,
                                               myNormFactors["nominal"], False,
                                               opts.normDataSrc,
                                               opts.normEwkSrc)
                        printTimeEstimate(myGlobalStartTime, myStartTime,
                                          iModule, myTotalModules)
                        systModule.delete()

        Verbose("Pseudo-multicrab ready for %s" % shapeType, True)

    # Create rest of pseudo multicrab directory
    myOutputCreator.silentFinalize()

    # Print some timing statistics
    Print(
        "Average processing time per module was %.1f s" %
        getAvgProcessTimeForOneModule(myGlobalStartTime, myTotalModules), True)
    Print(
        "Total elapsed time was %.1f s" %
        getTotalElapsedTime(myGlobalStartTime), False)

    msg = "Created pseudo-multicrab %s for shape type \"%s\"" % (
        myOutputCreator.getDirName(), shapeType)
    Print(ShellStyles.SuccessLabel() + msg, True)
    return
Exemplo n.º 11
0
def importNormFactors(era, searchMode, optimizationMode, multicrabDirName):
    '''
    Imports the auto-generates  QCDInvertedNormalizationFactors.py file, which is 
    created by the plotting/fitting templates script  (plotQCD_Fit.py)
    
    This containsthe results  of fitting to the Baseline Data the templates m_{jjb} 
    shapes from the QCD (Inverted Data) and EWK (Baseline MC).
 
    Results include the fit details for each shape and the QCD NormFactor for moving 
    from the ControlRegion (CR) to the Signal Region (SR).
    
    The aforementioned python file and a folder with the histogram ROOT files and the individual
    fits. The foler name will be normalisationPlots/<OptsMode> and will be placed inside the
    <pseudomulticrab_dir>. The autogenerated file file be place in the cwd (i.e. work/)
    '''
    # Find candidates for normalisation scripts
    scriptList = getNormFactorFileList(dirName=multicrabDirName,
                                       fileBaseName=opts.normFactorsSrc)

    # Create a string with the module information used
    moduleInfoString = getModuleInfoString(era, searchMode, optimizationMode)

    # Construct source file name
    src = getGetNormFactorsSrcFilename(multicrabDirName,
                                       opts.normFactorsSrc % moduleInfoString)

    # Check if normalization coefficients are suitable for the choses era
    Verbose("Reading normalisation factors from:\n\t%s" % src, True)

    # Split the path to get just the file name of src
    pathList = src.replace(".py", "").split("/")

    # Insert the directory where the normFactor files reside into the path so that they are found
    if len(pathList) > 1:
        cwd = os.getenv("PWD")
        # Get directories to src in a list [i.e. remove the last entry (file-name) from the pathList]
        dirList = map(str, pathList[:(len(pathList) - 1)])
        srcDir = "/".join(dirList)
        sys.path.insert(0, os.path.join(cwd, srcDir))

    # Import the (normFactor) src file
    normFactorsImport = __import__(os.path.basename("/".join(pathList)))

    # Get the function definition
    myNormFactorsSafetyCheck = getattr(normFactorsImport,
                                       "QCDInvertedNormalizationSafetyCheck")

    Verbose(
        "Check that the era=%s, searchMode=%s, optimizationMode=%s info matches!"
        % (era, searchMode, optimizationMode))
    myNormFactorsSafetyCheck(era, searchMode, optimizationMode)

    # Obtain normalization factors
    myNormFactorsImport = getattr(normFactorsImport, "QCDNormalization")
    msg = "Disabled NormFactors Syst Var Fake Weighting Up/Down"
    Print(ShellStyles.WarningLabel() + msg, True)
    # myNormFactorsImportSystVarFakeWeightingDown = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarDown") #FIXME
    # myNormFactorsImportSystVarFakeWeightingUp   = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarUp")   #FIXME

    myNormFactors = {}
    myNormFactors["nominal"] = myNormFactorsImport
    msg = "Obtained \"nominal\" QCD normalisation factors dictionary. The values are:\n"
    for k in myNormFactors["nominal"]:
        msg += "\t" + k + " = " + str(myNormFactors["nominal"][k])
    Print(ShellStyles.NoteLabel() + msg, True)

    msg = "Disabled NormFactors Weighting Up/Down"
    Print(ShellStyles.WarningLabel() + msg, True)
    # myNormFactors["FakeWeightingDown"] = myNormFactorsImportSystVarFakeWeightingDown # FIXME
    # myNormFactors["FakeWeightingUp"]   = myNormFactorsImportSystVarFakeWeightingUp   # FIXME
    return myNormFactors
Exemplo n.º 12
0
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setOptStat(True)
    style.setGridX(True)
    style.setGridY(True)

    # Obtain dsetMgrCreator and register it to module selector
    dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab)

    # Get list of eras, modes, and optimisation modes
    erasList = dsetMgrCreator.getDataEras()
    modesList = dsetMgrCreator.getSearchModes()
    optList = dsetMgrCreator.getOptimizationModes()
    sysVarList = dsetMgrCreator.getSystematicVariations()
    sysVarSrcList = dsetMgrCreator.getSystematicVariationSources()

    # If user does not define optimisation mode do all of them
    if opts.optMode == None:
        if len(optList) < 1:
            optList.append("")
        optModes = optList
    else:
        optModes = [opts.optMode]

    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities()  # from lumi.json
        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Get the PSets:
        if 0:
            datasetsMgr.printSelections()
            #PrintPSet("BJetSelection", datasetsMgr, depth=150)

        # ZJets and DYJets overlap!
        if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames(
        ) and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames():
            Print(
                "Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..",
                True)
            datasetsMgr.remove(
                filter(lambda name: "ZJetsToQQ" in name,
                       datasetsMgr.getAllDatasetNames()))

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py)
        plots.mergeRenameReorderForDataMC(datasetsMgr)

        # Get luminosity if a value is not specified
        if opts.intLumi < 0:
            opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity()

        # Remove datasets
        removeList = ["QCD-b"]  #, "Charged"]
        if not opts.useMC:
            removeList.append("QCD")
        for i, d in enumerate(removeList, 0):
            msg = "Removing dataset %s" % d
            Verbose(
                ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(),
                i == 0)
            datasetsMgr.remove(
                filter(lambda name: d in name,
                       datasetsMgr.getAllDatasetNames()))

        # Print summary of datasets to be used
        if 0:
            datasetsMgr.PrintInfo()

        # Merge EWK samples
        datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets())

        # Print dataset information
        datasetsMgr.PrintInfo()

        # Do the fit on the histo after ALL selections (incl. topology cuts)
        folderList = datasetsMgr.getDataset(
            datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(
                opts.folder)
        #folderList1 = [h for h in folderList if "TetrajetPt" in h]
        #folderList1 = [h for h in folderList if "TetrajetMass" in h]
        #folderList1 = [h for h in folderList if "MET" in h]
        #folderList1 = [h for h in folderList if "TetrajetBJetPt" in h]
        folderList1 = [h for h in folderList if "QGLR" in h]
        folderList2 = [
            h for h in folderList1
            if "CRtwo" in h or "VR" in h or "SR" in h or "CRone" in h
        ]

        # For-loop: All folders
        histoPaths = []
        for f in folderList2:
            folderPath = os.path.join(opts.folder, f)
            histoList = datasetsMgr.getDataset(
                datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(
                    folderPath)
            pathList = [os.path.join(folderPath, h) for h in histoList]
            histoPaths.extend(pathList)

        binLabels = GetBinLabels("CRone", histoPaths)
        PlotHistosAndCalculateTF(datasetsMgr, histoPaths, binLabels, opts)
    return
Exemplo n.º 13
0
def main(opts):

    # Object for selecting data eras, search modes, and optimization modes
    myModuleSelector = analysisModuleSelector.AnalysisModuleSelector()

    # Obtain dsetMgrCreator and register it to module selector
    dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab)

    # Obtain systematics names
    mySystematicsNamesRaw = dsetMgrCreator.getSystematicVariationSources()
    mySystematicsNames = []
    for i, item in enumerate(mySystematicsNamesRaw, 0):
        Print(
            "Using systematic %s" %
            (ShellStyles.NoteStyle() + item + ShellStyles.NormalStyle()),
            i == 0)
        mySystematicsNames.append("%sPlus" % item)
        mySystematicsNames.append("%sMinus" % item)
    if opts.test:
        mySystematicsNames = []

    # Set the primary source
    Verbose(
        "Setting the primary source (label=%s)" %
        (ShellStyles.NoteStyle() + opts.analysisName +
         ShellStyles.NormalStyle()), True)
    myModuleSelector.setPrimarySource(
        label=opts.analysisName,
        dsetMgrCreator=dsetMgrCreator)  #fixme: what is label for?

    # Select modules
    myModuleSelector.doSelect(opts=None)  #fixme: (opts=opts)

    # Loop over era/searchMode/optimizationMode combos
    myTotalModules = myModuleSelector.getSelectedCombinationCount() * (
        len(mySystematicsNames) + 1) * len(opts.shape)
    Verbose("Found %s modules in total" % (myTotalModules), True)

    count, nEras, nSearchModes, nOptModes, nSysVars = myModuleSelector.getSelectedCombinationCountIndividually(
    )
    if nSysVars > 0:
        msg = "Running  over %d modules (%d eras x %d searchModes x %d optimizationModes x %d systematic variations)" % (
            count, nEras, nSearchModes, nOptModes, nSysVars)
    else:
        msg = "Running over %d modules (%d eras x %d searchModes x %d optimizationModes)" % (
            count, nEras, nSearchModes, nOptModes)
    Verbose(msg, True)

    # Create pseudo-multicrab creator
    msg = "Will create pseudo-dataset %s inside the pseudo-multicrab directory" % (
        ShellStyles.NoteStyle() + opts.analysisName +
        ShellStyles.NormalStyle())
    Verbose(msg, True)
    myOutputCreator = pseudoMultiCrabCreator.PseudoMultiCrabCreator(
        opts.analysisName, opts.mcrab, verbose=opts.verbose)

    # Make time stamp for start time
    myGlobalStartTime = time.time()

    iModule = 0
    # For-loop: All Shapes
    for iShape, shapeType in enumerate(opts.shape, 1):

        msg = "Shape %d/%d:%s %s" % (iShape, len(
            opts.shape), ShellStyles.NormalStyle(), shapeType)
        Print(ShellStyles.CaptionStyle() + msg, True)

        # Initialize
        myOutputCreator.initialize(
            subTitle=shapeType,
            prefix="")  #fixeme: remove shapeType from sub-directory name?

        # Get lists of settings
        erasList = myModuleSelector.getSelectedEras()
        modesList = myModuleSelector.getSelectedSearchModes()
        optList = myModuleSelector.getSelectedOptimizationModes()
        if 0:
            optList.append(
                ""
            )  #append the default opt mode iff more optimization modes exist

        # For-Loop over era, searchMode, and optimizationMode options
        for era in erasList:
            for searchMode in modesList:
                for optimizationMode in optList:

                    Verbose(
                        "era = %s, searchMode = %s, optMode = %s" %
                        (era, searchMode, optimizationMode), True)
                    # If an optimization mode is defined in options skip the rest
                    if opts.optMode != None:
                        if optimizationMode != opts.optMode:
                            continue

                    # Obtain normalization factors
                    myNormFactors = importNormFactors(era, searchMode,
                                                      optimizationMode,
                                                      opts.mcrab)

                    # Nominal module
                    myModuleInfoString = getModuleInfoString(
                        era, searchMode, optimizationMode)
                    iModule += 1

                    # Inform user of what is being processes
                    msg = "Module %d/%d:%s %s/%s" % (
                        iModule, myTotalModules, ShellStyles.NormalStyle(),
                        myModuleInfoString, shapeType)
                    Print(ShellStyles.CaptionStyle() + msg, True)

                    # Keep time
                    myStartTime = time.time()

                    Verbose("Create dataset manager with given settings", True)
                    nominalModule = ModuleBuilder(opts, myOutputCreator,
                                                  opts.verbose)
                    nominalModule.createDsetMgr(opts.mcrab, era, searchMode,
                                                optimizationMode)

                    if (iModule == 1):
                        if opts.verbose:
                            nominalModule.debug()

                    doQCDNormalizationSyst = False  #FIXME
                    if not doQCDNormalizationSyst:
                        msg = "Disabling systematics"
                        Verbose(ShellStyles.WarningLabel() + msg, True)  #fixme

                    # Build the module
                    nominalModule.buildModule(
                        opts.dataSrc, opts.ewkSrc,
                        myNormFactors[opts.normFactorKey],
                        doQCDNormalizationSyst, opts.normDataSrc,
                        opts.normEwkSrc)

                    if len(mySystematicsNames) > 0:
                        Print(
                            "Adding QCD normalization systematics (iff also other systematics  present) ",
                            True)
                        nominalModule.buildQCDNormalizationSystModule(
                            opts.dataSrc, opts.ewkSrc)

                    # FIXME: add quark gluon weighting systematics!
                    if 0:
                        Print("Adding Quark/Gluon weighting systematics", True)
                        nominalModule.buildQCDQuarkGluonWeightingSystModule(
                            opts.dataSrc, opts.ewkSrc,
                            myNormFactors["FakeWeightingUp"],
                            myNormFactors["FakeWeightingDown"], False,
                            opts.normDataSrc, opts.normEwkSrc)

                    Verbose("Deleting nominal module", True)
                    nominalModule.delete()

                    Verbose("Printing time estimate", True)
                    printTimeEstimate(myGlobalStartTime, myStartTime, iModule,
                                      myTotalModules)

                    Verbose("Now do the rest of systematics variations", True)
                    for syst in mySystematicsNames:
                        iModule += 1
                        msg = "Analyzing systematics variations %d/%d: %s/%s/%s" % (
                            iModule, myTotalModules, myModuleInfoString, syst,
                            shapeType)
                        Print(
                            ShellStyles.CaptionStyle() + msg +
                            ShellStyles.NormalStyle(), True)
                        myStartTime = time.time()
                        systModule = ModuleBuilder(opts, myOutputCreator)
                        # Create dataset manager with given settings
                        systModule.createDsetMgr(opts.mcrab,
                                                 era,
                                                 searchMode,
                                                 optimizationMode,
                                                 systematicVariation=syst)

                        # Build asystematics module
                        systModule.buildModule(
                            opts.dataSrc, opts.ewkSrc,
                            myNormFactors[opts.normFactorKey], False,
                            opts.normDataSrc, opts.normEwkSrc)
                        printTimeEstimate(myGlobalStartTime, myStartTime,
                                          iModule, myTotalModules)
                        systModule.delete()

        Verbose("Pseudo-multicrab ready for %s" % shapeType, True)

    # Print some timing statistics
    Print(
        "Average processing time per module was %.1f seconds" %
        getAvgProcessTimeForOneModule(myGlobalStartTime, myTotalModules), True)
    Print(
        "Total elapsed time was %.1f seconds" %
        getTotalElapsedTime(myGlobalStartTime), False)

    # Create rest of pseudo multicrab directory
    myOutputCreator.finalize(silent=False)

    return
Exemplo n.º 14
0
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setGridX(False)
    style.setGridY(False)
    style.setOptStat(False)

    # Obtain dsetMgrCreator and register it to module selector
    dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab)

    # Get list of eras, modes, and optimisation modes
    erasList = dsetMgrCreator.getDataEras()
    modesList = dsetMgrCreator.getSearchModes()
    optList = dsetMgrCreator.getOptimizationModes()
    sysVarList = dsetMgrCreator.getSystematicVariations()
    sysVarSrcList = dsetMgrCreator.getSystematicVariationSources()

    # If user does not define optimisation mode do all of them
    if opts.optMode == None:
        if len(optList) < 1:
            optList.append("")
        optModes = optList
    else:
        optModes = [opts.optMode]

    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities()  # from lumi.json
        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Get the PSets:
        if 0:
            datasetsMgr.printSelections()
            #PrintPSet("BJetSelection", datasetsMgr, depth=150)
            #PrintPSet("fakeBMeasurement", datasetsMgr, depth=150)
            sys.exit()

        # ZJets and DYJets overlap!
        if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames(
        ) and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames():
            Print(
                "Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..",
                True)
            datasetsMgr.remove(
                filter(lambda name: "ZJetsToQQ" in name,
                       datasetsMgr.getAllDatasetNames()))

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py)
        plots.mergeRenameReorderForDataMC(datasetsMgr)

        # Get luminosity if a value is not specified
        if opts.intLumi < 0:
            opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity()

        # Remove datasets
        removeList = ["QCD-b", "Charged"]
        if not opts.useMC:
            removeList.append("QCD")
        for i, d in enumerate(removeList, 0):
            msg = "Removing dataset %s" % d
            Verbose(
                ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(),
                i == 0)
            datasetsMgr.remove(
                filter(lambda name: d in name,
                       datasetsMgr.getAllDatasetNames()))

        # Print summary of datasets to be used
        if 0:
            datasetsMgr.PrintInfo()

        # Merge EWK samples
        datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets())

        # Print dataset information
        datasetsMgr.PrintInfo()

        # List of TDirectoryFile (_CRone, _CRtwo, _VR, _SR)
        tdirs = [
            "LdgTrijetPt_", "LdgTrijetMass_", "TetrajetBJetPt_",
            "TetrajetBJetEta_", "LdgTetrajetPt_", "LdgTetrajetMass_"
        ]
        region = ["CRone", "CRtwo"]
        hList = []
        for d in tdirs:
            for r in region:
                hList.append(d + r)

        # Get the folders with the binned histograms
        folderList_ = datasetsMgr.getDataset(
            datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(
                opts.folder)
        folderList = [h for h in folderList_ if h in hList]

        # For-loop: All folders
        histoPaths = []
        for f in folderList:
            folderPath = os.path.join(opts.folder, f)
            histoList = datasetsMgr.getDataset(
                datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(
                    folderPath)
            pathList = [os.path.join(folderPath, h) for h in histoList]
            histoPaths.extend(pathList)

        # Get all the bin labels
        binLabels = GetBinLabels("CRone", histoPaths)

        for i, t in enumerate(tdirs, 1):
            myList = []
            for p in histoPaths:
                if t in p:
                    myList.append(p)
            msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format(
                "Histogram", "%i" % i, "/", "%s:" % (len(tdirs)),
                t.replace("_", ""))
            Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(),
                  i == 1)

            PlotHistograms(datasetsMgr, myList, binLabels, opts)

    # Save the plots
    Print(
        "All plots saved under directory %s" %
        (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) +
         ShellStyles.NormalStyle()), True)
    return
def main(opts):

    # Apply TDR style
    style = tdrstyle.TDRStyle()
    style.setGridX(False)
    style.setGridY(False)

    optModes = [""]
    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Numerator & Denominator dataset manager
        noSF_datasetsMgr      = GetDatasetsFromDir(opts, opts.noSFcrab)
        withCR2SF_datasetsMgr = GetDatasetsFromDir(opts, opts.withCR2SFcrab) 
        
        # Update all events to PU weighting
        noSF_datasetsMgr.updateNAllEventsToPUWeighted()
        withCR2SF_datasetsMgr.updateNAllEventsToPUWeighted()
        
        # Load Luminosities
        noSF_datasetsMgr.loadLuminosities()
        withCR2SF_datasetsMgr.loadLuminosities()
        
        
        if 0:
            noSF_datasetsMgr.PrintCrossSections()
            noSF_datasetsMgr.PrintLuminosities()
 
        # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
        plots.mergeRenameReorderForDataMC(noSF_datasetsMgr) 
        plots.mergeRenameReorderForDataMC(withCR2SF_datasetsMgr) 
        
        # Get luminosity if a value is not specified
        if opts.intLumi < 0:
            opts.intLumi = noSF_datasetsMgr.getDataset("Data").getLuminosity()
            
        # Remove datasets
        removeList = []
        #removeList = ["TTWJetsToLNu_", "TTWJetsToQQ"]
        for i, d in enumerate(removeList, 0):
            msg = "Removing dataset %s" % d
            Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0)
            noSF_datasetsMgr.remove(filter(lambda name: d in name, noSF_datasetsMgr.getAllDatasetNames()))
            
        
        # Print summary of datasets to be used
        if 0:
            noSF_datasetsMgr.PrintInfo()
            withCR2SF_datasetsMgr.PrintInfo()
            
        # Merge EWK samples
        EwkDatasets = ["Diboson", "DYJetsToLL", "WJetsHT"]
        noSF_datasetsMgr.merge("EWK", EwkDatasets)
        withCR2SF_datasetsMgr.merge("EWK", EwkDatasets)
        
        # Get histosgram names
        folderListIncl = withCR2SF_datasetsMgr.getDataset(withCR2SF_datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(opts.folder)
        folderList = [h for h in folderListIncl if "AfterAllSelections_LeadingTrijet_Pt" in h ]

        # For-loop: All histo paths
        for h in folderList:
            if "lowMET" in h:
                folderList.remove(h)
        
        folderPath    = os.path.join(opts.folder, "")
        folderPathGen = os.path.join(opts.folder + "Genuine")
        folderPathFake =os.path.join(opts.folder + "Fake"   )
        
        histoList = folderList
        num_pathList = [os.path.join(folderPath, h) for h in histoList]
        num_pathList.extend([os.path.join(folderPathGen, h) for h in histoList])
        num_pathList.extend([os.path.join(folderPathFake, h) for h in histoList])
        
        # Denominator Histogram (To be used in the estimation of QCD Data-Driven)
        histoList = [h for h in folderListIncl if "AfterStandardSelections_LeadingTrijet_Pt" in h]
        den_pathList = [os.path.join(folderPath, h) for h in histoList]
        den_pathList.extend([os.path.join(folderPathGen, h) for h in histoList])
        den_pathList.extend([os.path.join(folderPathFake, h) for h in histoList])

        # For-loop: All histo paths
        for h in den_pathList:
            if "lowMET" in h:
                den_pathList.remove(h)
        
        # Do the histograms
        PlotHistos(noSF_datasetsMgr, withCR2SF_datasetsMgr, num_pathList, den_pathList,  opts)
        
    return
Exemplo n.º 16
0
def main(opts):

    optModes = [""]
    if opts.optMode != None:
        optModes = [opts.optMode]

    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities(fname="lumi.json")

        # Get Luminosity
        if opts.intLumi < 0:
            if "Data" in datasetsMgr.getAllDatasetNames():
                opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity()
            else:
                opts.intLumi = datasetsMgr.loadLumi()

        # Set/Overwrite cross-sections
        datasetsToRemove = []
        for d in datasetsMgr.getAllDatasets():
            if "M_%s" % (opts.signalMass) in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0)
            else:
                datasetsToRemove.append(d.getName())

        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py)
        plots.mergeRenameReorderForDataMC(datasetsMgr)

        # Custom Filtering of datasets
        for i, d in enumerate(datasetsToRemove, 0):
            msg = "Removing dataset %s" % d
            Verbose(
                ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(),
                i == 0)
            datasetsMgr.remove(
                filter(lambda name: d == name,
                       datasetsMgr.getAllDatasetNames()))

        if opts.verbose:
            datasetsMgr.PrintInfo()

        # Merge EWK samples
        if opts.mergeEWK:
            datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets())
            plots._plotStyles["EWK"] = styles.getAltEWKStyle()

        # Print dataset information
        datasetsMgr.PrintInfo()

        # Apply TDR style
        style = tdrstyle.TDRStyle()
        style.setOptStat(True)
        style.setGridX(opts.gridX)
        style.setGridY(opts.gridY)

        # Do Data-MC histograms with DataDriven QCD
        folder = opts.folder
        histoList = datasetsMgr.getDataset(
            datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder)
        histoPaths = [os.path.join(folder, h) for h in histoList]
        keepList = ["LdgTetrajetMass_AfterAllSelections"]
        #keepList   = ["LdgTetrajetMass_AfterStandardSelections"]
        myHistos = []
        for h in histoPaths:
            if h.split("/")[-1] not in keepList:
                continue
            else:
                myHistos.append(h)

        for i, h in enumerate(myHistos, 1):
            PlotHistograms(datasetsMgr, h)

    Print(
        "All plots saved under directory %s" %
        (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) +
         ShellStyles.NormalStyle()), True)
    return
Exemplo n.º 17
0
    def _doCalculate2D(self, nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels):
        '''
        Calculates the result for 2D histograms
        '''
        # Intialize counters for purity calculation in final shape binning
        myShapeDataSum = []
        myShapeDataSumUncert = []
        myShapeEwkSum = []
        myShapeEwkSumUncert = []
        myList = []
        for k in range(1,self._resultShape.GetNbinsY()+1):
            myList.append(0.0)
        for j in range(1,self._resultShape.GetNbinsX()+1):
            myShapeDataSum.append(myList[:])
            myShapeDataSumUncert.append(myList[:])
            myShapeEwkSum.append(myList[:])
            myShapeEwkSumUncert.append(myList[:])

        # Calculate results separately for each phase-space bin, and then combine them to get inclusive result
        for i in range(0, nSplitBins):
            # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
            h     = shape.getDataDrivenQCDHistoForSplittedBin(i)
            hData = shape.getDataHistoForSplittedBin(i)
            hEwk  = shape.getEwkHistoForSplittedBin(i)

            # Get normalization factor
            wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
            if self._optionUseInclusiveNorm:
                wQCDLabel = "Inclusive"
            wQCD = 0.0
            if not wQCDLabel in normFactors.keys():
                msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel, shape.getHistoName())
                print ShellStyles.WarningLabel() + msg
            else:
                wQCD = normFactors[wQCDLabel]
            # Loop over bins in the shape histogram
            for j in range(1,h.GetNbinsX()+1):
                for k in range(1,h.GetNbinsY()+1):
                    myResult = 0.0
                    myStatDataUncert = 0.0
                    myStatEwkUncert = 0.0
                    if abs(h.GetBinContent(j,k)) > 0.00001: # Ignore zero bins
                        # Calculate result
                        myResult = h.GetBinContent(j,k) * wQCD
                        # Calculate abs. stat. uncert. for data and for MC EWK
                        myStatDataUncert = hData.GetBinError(j,k) * wQCD
                        myStatEwkUncert = hEwk.GetBinError(j,k) * wQCD
                        #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                        # Do not calculate here MC EWK syst.
                    myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
                    self._resultCountObject.add(myCountObject)
                    if optionDoNQCDByBinHistograms:
                        self._histogramsList[i].SetBinContent(j, k, myCountObject.value())
                        self._histogramsList[i].SetBinError(j, k, myCountObject.statUncertainty())
                    self._resultShape.SetBinContent(j, k, self._resultShape.GetBinContent(j, k) + myCountObject.value())
                    self._resultShape.SetBinError(j, k, self._resultShape.GetBinError(j, k) + myCountObject.statUncertainty()**2) # Sum squared
                    # Sum items for purity calculation
                    myShapeDataSum[j-1][k-1] += hData.GetBinContent(j,k)*wQCD
                    myShapeDataSumUncert[j-1][k-1] += (hData.GetBinError(j,k)*wQCD)**2
                    myShapeEwkSum[j-1][k-1] += hEwk.GetBinContent(j,k)*wQCD
                    myShapeEwkSumUncert[j-1][k-1] += (hEwk.GetBinError(j,k)*wQCD)**2
            h.Delete()
            hData.Delete()
            hEwk.Delete()
        # Take square root of uncertainties
        for j in range(1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                self._resultShape.SetBinError(j, k, math.sqrt(self._resultShape.GetBinError(j, k)))

        # Print result
        print "NQCD Integral(%s) = %s "%(shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f"))

        # Print purity as function of final shape bins
        if optionPrintPurityByBins:
            print "Purity of shape %s"%shape.getHistoName()
            print "shapeBin purity purityUncert"
        for j in range (1,self._resultShape.GetNbinsX()+1):
            for k in range(1,self._resultShape.GetNbinsY()+1):
                myPurity = 0.0
                myPurityUncert = 0.0
                if abs(myShapeDataSum[j-1][k-1]) > 0.000001:
                    myPurity = 1.0 - myShapeEwkSum[j-1][k-1] / myShapeDataSum[j-1][k-1]
                    myPurityUncert = errorPropagation.errorPropagationForDivision(myShapeEwkSum[j-1][k-1], math.sqrt(myShapeEwkSumUncert[j-1][k-1]), myShapeDataSum[j-1][k-1], math.sqrt(myShapeDataSumUncert[j-1][k-1]))
                # Store MC EWK content
                self._resultShapeEWK.SetBinContent(j, k, myShapeEwkSum[j-1][k-1])
                self._resultShapeEWK.SetBinError(j, k, math.sqrt(myShapeEwkSumUncert[j-1][k-1]))
                self._resultShapePurity.SetBinContent(j, k, myPurity)
                self._resultShapePurity.SetBinError(j, k, myPurityUncert)
                # Print purity info of final shape
                if optionPrintPurityByBins:
                    myString = ""
                    if j < self._resultShape.GetNbinsX():
                        myString = "%d..%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j),self._resultShape.GetXaxis().GetBinUpEdge(j))
                    else:
                        myString = ">%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j))
                    if k < self._resultShape.GetNbinsY():
                        myString = "%d..%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k),self._resultShape.GetYaxis().GetBinUpEdge(k))
                    else:
                        myString = ">%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k))
                    myString += " %.3f %.3f"%(myPurity, myPurityUncert)
                    print myString
        return
Exemplo n.º 18
0
def main(opts):

    #optModes = ["", "OptChiSqrCutValue50", "OptChiSqrCutValue100"]
    optModes = [""]

    if opts.optMode != None:
        optModes = [opts.optMode]
        
    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager 
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities() # from lumi.json

        if 0:
            datasetsMgr.printSelections()
            sys.exit()

        # Define datasets to remove by default
        QCD_list = ["QCD_HT700to1000", "QCD_HT50to100", "QCD_HT500to700", "QCD_HT300to500", 
                    "QCD_HT200to300", "QCD_HT2000toInf", "QCD_HT1500to2000", "QCD_HT100to200", "QCD_HT1000to1500"]
        QCDExt_list = [x+"_ext1" for x in QCD_list]
        datasetsToRemove = ["QCD-b"]
        # datasetsToRemove.extend(QCD_list)
        # datasetsToRemove.extend(QCDExt_list)

        # ZJets and DYJets overlap
        if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames():
            Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True)
            datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames()))
            #datasetsMgr.remove(filter(lambda name: "DYJetsToQQ" in name, datasetsMgr.getAllDatasetNames()))
        
        # Set/Overwrite cross-sections
        for d in datasetsMgr.getAllDatasets():
            if "ChargedHiggs" in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits
                if d.getName() != opts.signal:
                    datasetsToRemove.append(d.getName())

        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
        plots.mergeRenameReorderForDataMC(datasetsMgr) 

        # Custom Filtering of datasets 
        for i, d in enumerate(datasetsToRemove, 0):
            msg = "Removing dataset %s" % d
            Verbose(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0)
            datasetsMgr.remove(filter(lambda name: d == name, datasetsMgr.getAllDatasetNames()))

        if opts.verbose:
            datasetsMgr.PrintInfo()
  
        # Re-order datasets (different for inverted than default=baseline)
        newOrder = ["Data"]
        for i, d in enumerate(datasetsMgr.getAllDatasets(), 0):
            if d.isData():
                continue
            else:
                newOrder.append(d.getName())

        # Re-arrange dataset order?
        if 0:
            s = newOrder.pop( newOrder.index("noTop") )
            newOrder.insert(len(newOrder), s) #after "Data"

        # Move signal to top
        if opts.signal in newOrder:
            s = newOrder.pop( newOrder.index(opts.signal) )
            newOrder.insert(1, s)
        datasetsMgr.selectAndReorder(newOrder)
        
        # Merge EWK samples
        if opts.mergeEWK:
            datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets())
            plots._plotStyles["EWK"] = styles.getAltEWKStyle()

        # Print dataset information
        datasetsMgr.PrintInfo()

        # Apply TDR style
        style = tdrstyle.TDRStyle()
        style.setOptStat(True)
        style.setGridX(opts.gridX)
        style.setGridY(opts.gridY)

        # Do Data-MC histograms with DataDriven QCD
        folder     = opts.folder
        histoList  = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder)        
        histoPaths = [os.path.join(folder, h) for h in histoList]
        ignoreList = ["Aplanarity", "Planarity", "Sphericity", "FoxWolframMoment", "Circularity", "ThirdJetResolution", "Centrality", "_Vs_"]
        myHistos   = []
        for h in histoPaths:
            skip = False

            # Skip unwanted histos
            for i in ignoreList:
                if i in h:
                    skip = True

            if skip:
                continue
            else:
                myHistos.append(h)

        for i, h in enumerate(myHistos, 1):
            # Plot the histograms!
            msg   = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (len(myHistos)), h)
            Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), i==1)

            DataMCHistograms(datasetsMgr, h)
        
    Print("All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True)    
    return
Exemplo n.º 19
0
def main(opts):
#======================
    
    optModes = [""]
    
    if opts.optMode != None:
        optModes = [opts.optMode]
        
    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager 
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities() # from lumi.json
        
        # Set/Overwrite cross-sections
        datasetsToRemove = ["QCD-b", "TTTT"]#, "QCD_HT50to100", "QCD_HT100to200"]#, "QCD_HT200to300"]#, "QCD_HT300to500"]
        for d in datasetsMgr.getAllDatasets():
            if "ChargedHiggs" in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits
                    
        # Re-order datasets 
        datasetOrder = []
        for d in datasetsMgr.getAllDatasets():
            if "M_" in d.getName():
                if d not in signalMass:
                    continue
            datasetOrder.append(d.getName())    
        for m in signalMass:
            datasetOrder.insert(0, m)
        datasetsMgr.selectAndReorder(datasetOrder)
        

        datasetsMgr.PrintCrossSections()
        datasetsMgr.PrintLuminosities()

        # Custom Filtering of datasets 
        for i, d in enumerate(datasetsToRemove, 0):
            msg = "Removing dataset %s" % d
            Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0)
            datasetsMgr.remove(filter(lambda name: d in name, datasetsMgr.getAllDatasetNames()))
        if opts.verbose:
            datasetsMgr.PrintInfo()

        # ZJets and DYJets overlap
        if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames():
            Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True)
            datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames()))
            
        # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
        plots.mergeRenameReorderForDataMC(datasetsMgr) 
        
        # Get Luminosity
        intLumi = datasetsMgr.getDataset("Data").getLuminosity()

        # Merge EWK samples
        if opts.mergeEWK:
            datasetsMgr.merge("EWK", GetListOfEwkDatasets(datasetsMgr))
            plots._plotStyles["EWK"] = styles.getAltEWKStyle()

        # Print dataset information
        datasetsMgr.PrintInfo()
        
        # Apply TDR style
        style = tdrstyle.TDRStyle()
        style.setOptStat(True)
        style.setGridX(opts.gridX)
        style.setGridY(opts.gridY)

        # Get histogram list
        folder     = opts.folder
        histoList  = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder)
        histoPaths1 = [os.path.join(folder, h) for h in histoList]
        histoPaths2 = [h for h in histoPaths1 if "jet" not in h.lower()]
        nHistos     = len(histoPaths2)
        
        # Calculate Signal Significance for all histograms
        for h in histoList:
            PlotCutFlowEfficiency(h, datasetsMgr, intLumi) 
    return
Exemplo n.º 20
0
def PlotAndFitTemplates(datasetsMgr,
                        histoName,
                        folderName,
                        opts,
                        doFakeB=False):
    Verbose("PlotAndFitTemplates()")

    # Definitions
    inclusiveFolder = folderName
    genuineBFolder = folderName + "EWKGenuineB"
    fakeBFolder = folderName + "EWKFakeB"
    if doFakeB:
        ewkFolder = genuineBFolder
        bkgName = "FakeB"
    else:
        ewkFolder = inclusiveFolder
        bkgName = "QCD"

    # Create the plotters
    p1 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (inclusiveFolder, histoName))
    p2 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (ewkFolder, histoName))
    p3 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (inclusiveFolder, histoName))
    p4 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (ewkFolder, histoName))

    if 0:
        p1.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2))
        p2.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2))
        p3.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2))
        p4.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2))

    # Get the histograms
    Data_baseline = p1.histoMgr.getHisto("Data").getRootHisto().Clone(
        "Baseline Data")  #also legend entry name
    FakeB_baseline = p1.histoMgr.getHisto("Data").getRootHisto().Clone(
        "Baseline " + bkgName)
    EWK_baseline = p2.histoMgr.getHisto("EWK").getRootHisto().Clone(
        "Baseline EWK")
    Data_inverted = p3.histoMgr.getHisto("Data").getRootHisto().Clone(
        "Inverted Data")
    FakeB_inverted = p3.histoMgr.getHisto("Data").getRootHisto().Clone(
        "Inverted " + bkgName)
    EWK_inverted = p4.histoMgr.getHisto("EWK").getRootHisto().Clone(
        "Inverted EWK")

    # Create FakeB histos: FakeB = (Data - EWK)
    msg = "Disabled EWK subtraction (Use Case: Control Triggers)"
    Print(ShellStyles.WarningLabel() + msg, True)
    #FakeB_baseline.Add(EWK_baseline, -1)
    #FakeB_inverted.Add(EWK_inverted, -1)

    # Create the final plot object
    compareHistos = [EWK_baseline]
    p = plots.ComparisonManyPlot(FakeB_inverted, compareHistos, saveFormats=[])
    p.setLuminosity(GetLumi(datasetsMgr))

    # Apply styles
    p.histoMgr.forHisto("Inverted " + bkgName, styles.getFakeBStyle())
    p.histoMgr.forHisto("Baseline EWK", styles.getAltEWKStyle())

    # Set draw style
    p.histoMgr.setHistoDrawStyle("Inverted " + bkgName, "P")
    p.histoMgr.setHistoDrawStyle("Baseline EWK", "AP")

    # Set legend style
    p.histoMgr.setHistoLegendStyle("Inverted " + bkgName, "P")
    p.histoMgr.setHistoLegendStyle("Baseline EWK", "LP")
    # p.histoMgr.setHistoLegendStyleAll("LP")

    # Set legend labels
    if doFakeB:
        p.histoMgr.setHistoLegendLabelMany({
            "Baseline EWKGenuineB": "EWK (GenuineB)",
            "Inverted FakeB": "Fake-b",
        })
    else:
        p.histoMgr.setHistoLegendLabelMany({
            "Baseline EWK": "EWK",
            "Inverted " + bkgName: "QCD",
        })

    #=========================================================================================
    # Set Minimizer Options
    #=========================================================================================
    '''
    https://root.cern.ch/root/htmldoc/guides/users-guide/FittingHistograms.html#the-th1fit-method
    https://root.cern.ch/root/html/src/ROOT__Math__MinimizerOptions.h.html#a14deB
    '''
    if 0:
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Migrad")
        ROOT.Math.MinimizerOptions.SetDefaultStrategy(
            2)  # Speed = 0, Balance = 1, Robustness = 2
        ROOT.Math.MinimizerOptions.SetDefaultMaxFunctionCalls(
            5000)  # set maximum of function calls
        ROOT.Math.MinimizerOptions.SetDefaultMaxIterations(
            5000
        )  # set maximum iterations (one iteration can have many function calls)
    if 0:
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Simplex")
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Minimize")
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit",
                                                       "MigradImproved")
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Scan")
        ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Seek")
        ROOT.Math.MinimizerOptions.SetDefaultErrorDef(
            1
        )  # error definition (=1. for getting 1 sigma error for chi2 fits)
        ROOT.Math.MinimizerOptions.SetDefaultMaxFunctionCalls(
            1000000)  # set maximum of function calls
        ROOT.Math.MinimizerOptions.SetDefaultMaxIterations(
            1000000
        )  # set maximum iterations (one iteration can have many function calls)
        ROOT.Math.MinimizerOptions.SetDefaultPrecision(
            -1
        )  # precision in the objective function calculation (value <= 0 means left to default)
        ROOT.Math.MinimizerOptions.SetDefaultPrintLevel(
            1
        )  # None = -1, Reduced = 0, Normal = 1, ExtraForProblem = 2, Maximum = 3
        ROOT.Math.MinimizerOptions.SetDefaultTolerance(
            1e-03
        )  # Minuit/Minuit2 converge when the EDM is less a given tolerance. (default 1e-03)
    if 1:
        hLine = "=" * 45
        title = "{:^45}".format("Minimzer Options")
        print "\t", hLine
        print "\t", title
        print "\t", hLine
        minOpt = ROOT.Math.MinimizerOptions()
        minOpt.Print()
        print "\t", hLine, "\n"

    #=========================================================================================
    # Start fit process
    #=========================================================================================
    binLabels = ["Inclusive"]
    FITMIN = 80
    FITMAX = 1000
    #moduleInfoString = opts.dataEra + "_" + opts.searchMode + "_" + opts.optMode
    moduleInfoString = opts.optMode

    #=========================================================================================
    # Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager)
    #=========================================================================================
    manager = QCDNormalization.QCDNormalizationManagerDefault(
        binLabels, opts.mcrab, moduleInfoString)
    template_EWKFakeB_Baseline = manager.createTemplate("EWKFakeB_Baseline")
    template_EWKFakeB_Inverted = manager.createTemplate("EWKFakeB_Inverted")
    template_EWKInclusive_Baseline = manager.createTemplate(
        "EWKInclusive_Baseline")
    template_EWKInclusive_Inverted = manager.createTemplate(
        "EWKInclusive_Inverted")
    template_FakeB_Baseline = manager.createTemplate("QCD_Baseline")
    template_FakeB_Inverted = manager.createTemplate("QCD_Inverted")

    #======================================================================2==================
    # EWK
    #=========================================================================================
    par0 = [+7.1817e-01, 0.0, 1.0]  # cb_norm
    par1 = [+1.7684e+02, 150.0, 200.0]  # cb_mean
    par2 = [+2.7287e+01, 20.0, 40.0]  # cb_sigma (fixed for chiSq=2)
    par3 = [-3.9174e-01, -0.5, 0.0]  # cb_alpha (fixed for chiSq=2)
    par4 = [+2.5104e+01, 0.0, 50.0]  # cb_n
    par5 = [+7.4724e-05, 0.0, 1.0]  # expo_norm
    par6 = [-4.6848e-02, -1.0, 0.0]  # expo_a
    par7 = [+2.1672e+02, 200.0, 250.0]  # gaus_mean (fixed for chiSq=2)
    par8 = [+6.3201e+01, 20.0, 80.0]  # gaus_sigma

    template_EWKInclusive_Baseline.setFitter(
        QCDNormalization.FitFunction("EWKFunction",
                                     boundary=0,
                                     norm=1,
                                     rejectPoints=0), FITMIN, FITMAX)
    template_EWKInclusive_Baseline.setDefaultFitParam(
        defaultInitialValue=None,
        defaultLowerLimit=[
            par0[1], par1[1], par2[0], par3[0], par4[1], par5[1], par6[1],
            par7[0], par8[1]
        ],
        defaultUpperLimit=[
            par0[2], par1[2], par2[0], par3[0], par4[2], par5[2], par6[2],
            par7[0], par8[2]
        ])

    #=========================================================================================
    # FakeB/QCD
    #=========================================================================================
    par0 = [8.9743e-01, 0.0, 1.0]  # lognorm_norm
    par1 = [2.3242e+02, 300.0, 1000.0]  # lognorm_mean
    par2 = [1.4300e+00, 0.5, 10.0]  # lognorm_shape
    par3 = [2.2589e+02, 100.0, 500.0]  # gaus_mean
    par4 = [4.5060e+01, 0.0, 100.0]  # gaus_sigma

    template_FakeB_Inverted.setFitter(
        QCDNormalization.FitFunction("QCDFunctionAlt",
                                     boundary=0,
                                     norm=1,
                                     rejectPoints=0), FITMIN, FITMAX)
    template_FakeB_Inverted.setDefaultFitParam(
        defaultInitialValue=None,
        defaultLowerLimit=[par0[1], par1[1], par2[1], par3[1], par4[1]],
        defaultUpperLimit=[par0[2], par1[2], par2[2], par3[2], par4[2]])

    #=========================================================================================
    # Set histograms to the templates
    #=========================================================================================
    if doFakeB:
        template_EWKFakeB_Baseline.setHistogram(EWKGenuineB_baseline,
                                                "Inclusive")
        template_EWKFakeB_Inverted.setHistogram(EWKGenuineB_inverted,
                                                "Inclusive")
        template_EWKInclusive_Baseline.setHistogram(EWKGenuineB_baseline,
                                                    "Inclusive")
        template_EWKInclusive_Inverted.setHistogram(EWKGenuineB_inverted,
                                                    "Inclusive")
    else:
        template_EWKFakeB_Baseline.setHistogram(EWK_baseline, "Inclusive")
        template_EWKFakeB_Inverted.setHistogram(EWK_inverted, "Inclusive")
        template_EWKInclusive_Baseline.setHistogram(EWK_baseline, "Inclusive")
        template_EWKInclusive_Inverted.setHistogram(EWK_inverted, "Inclusive")
    template_FakeB_Baseline.setHistogram(FakeB_baseline, "Inclusive")
    template_FakeB_Inverted.setHistogram(FakeB_inverted, "Inclusive")

    #=========================================================================================
    # Make plots of templates
    #=========================================================================================
    manager.plotTemplates()

    #=========================================================================================
    # Fit individual templates to histogram "data_baseline", with custom fit options
    #=========================================================================================
    fitOptions = "R B L W 0 Q M"
    manager.calculateNormalizationCoefficients(Data_baseline, fitOptions,
                                               FITMIN, FITMAX)

    # Only for when the measurement is done in bins
    fileName = os.path.join(
        opts.mcrab,
        "QCDInvertedNormalizationFactors%s.py" % (getModuleInfoString(opts)))
    manager.writeNormFactorFile(fileName, opts)

    if 1:
        saveName = fileName.replace("/", "_")

        # Draw the histograms
        plots.drawPlot(
            p, saveName,
            **GetHistoKwargs(histoName))  #the "**" unpacks the kwargs_

        # Save plot in all formats
        SavePlot(p, saveName, os.path.join(opts.saveDir, "Fit"))
    return
Exemplo n.º 21
0
def main(opts):

    #optModes = ["", "OptChiSqrCutValue50", "OptChiSqrCutValue100"]
    optModes = [""]

    if opts.optMode != None:
        optModes = [opts.optMode]
        
    # For-loop: All opt Mode
    for opt in optModes:
        opts.optMode = opt

        # Setup & configure the dataset manager 
        datasetsMgr = GetDatasetsFromDir(opts)
        datasetsMgr.updateNAllEventsToPUWeighted()
        datasetsMgr.loadLuminosities() # from lumi.json

        datasetsMgr_matched = GetDatasetsFromDir(opts)
        datasetsMgr_matched.updateNAllEventsToPUWeighted()
        datasetsMgr_matched.loadLuminosities() # from lumi.json

        plots.mergeRenameReorderForDataMC(datasetsMgr) 
        datasetsMgr.remove(filter(lambda name: "QCD_b" in name, datasetsMgr.getAllDatasetNames())) #soti
        datasetsMgr_matched.remove(filter(lambda name: "QCD" in name, datasetsMgr_matched.getAllDatasetNames())) #soti
        # Set/Overwrite cross-sections
        datasetsToRemove = ["QCD-b"]#, "QCD_HT50to100", "QCD_HT100to200"]#, "QCD_HT200to300"]#, "QCD_HT300to500"]
        for d in datasetsMgr.getAllDatasets():
            if "ChargedHiggs" in d.getName():
                datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits
                #if d.getName() != opts.signal:
                if "M_650" in d.getName():  #soti fixmi
                    datasetsToRemove.append(d.getName())
                if "M_800" in d.getName():
                    datasetsToRemove.append(d.getName())
                if "M_200" in d.getName():
                    datasetsToRemove.append(d.getName())

        if opts.verbose:
            datasetsMgr.PrintCrossSections()
            datasetsMgr.PrintLuminosities()

        # Custom Filtering of datasets 
        for i, d in enumerate(datasetsToRemove, 0):
            msg = "Removing dataset %s" % d
            Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0)
            datasetsMgr.remove(filter(lambda name: d in name, datasetsMgr.getAllDatasetNames()))
            datasetsMgr_matched.remove(filter(lambda name: d in name, datasetsMgr_matched.getAllDatasetNames())) #soti
        if opts.verbose:
            datasetsMgr.PrintInfo()

        # ZJets and DYJets overlap
        if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames():
            Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True)
            datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames()))


        #datasetsMgr.merge("QCD", GetListOfQCDatasets())
        #plots._plotStyles["QCD"] = styles.getQCDLineStyle()
        # Merge histograms (see NtupleAnalysis/python/tools/plots.py) 
        # Get Luminosity
        #intLumi = datasetsMgr.getDataset("Data").getLuminosity() Soti
        intLumi = 35920

        # Re-order datasets (different for inverted than default=baseline)

        newOrder = []
        # For-loop: All MC datasets
        for d in datasetsMgr.getMCDatasets():
            newOrder.append(d.getName())
        
        # Move signal to top
#        if opts.signal in newOrder:
#            s = newOrder.pop( newOrder.index(opts.signal) )
#            newOrder.insert(0, s)
        print len(newOrder), "newOrder"
        signalMass = ["M_300", "M_500", "M_1000"]
        for d in datasetsMgr.getMCDatasets():
            for m in signalMass:
                if m in d.getName():
                    s = newOrder.pop( newOrder.index(d.getName()) )
                    newOrder.insert(0, s)
                    #datasetsMgr.selectAndReorder(newOrder)
        print len(newOrder), "newOrder"
        # Add Data to list of samples!
        if not opts.onlyMC:
            newOrder.insert(0, "Data")
            
        # Apply new dataset order!
        datasetsMgr.selectAndReorder(newOrder)

        # Merge EWK samples
        if opts.mergeEWK:
            datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets())
            plots._plotStyles["EWK"] = styles.getAltEWKStyle()

        # Print dataset information
        datasetsMgr.PrintInfo()
        
        # Apply TDR style
        style = tdrstyle.TDRStyle()
        style.setOptStat(True)
        style.setGridX(opts.gridX)
        style.setGridY(opts.gridY)

        # Do Data-MC histograms with DataDriven QCD
        folder     = opts.folder
        histoList  = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder)
        histoPaths1 = [os.path.join(folder, h) for h in histoList]
        histoPaths2 = [h for h in histoPaths1]# if "jet" not in h.lower()]
        nHistos     = len(histoPaths2)

        # For-loop: All histograms
        for i, h in enumerate(histoPaths2, 1):
            msg   = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (nHistos), h)
            Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), i==1)
            PlotHistograms(datasetsMgr, datasetsMgr_matched, h, intLumi)
        ROOT.gStyle.SetNdivisions(10, "X")
    Print("All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True)
    return
Exemplo n.º 22
0
    def _doCalculate(self, shape, moduleInfoString, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms):
        '''
        Calculates the result
        '''
        Verbose("Calculate final shape in signal region (shape * w_QCD) & initialize result containers", True)
        nSplitBins = shape.getNumberOfPhaseSpaceSplitBins()

        Verbose("Create Shape", True)
        self._resultShape = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShape.Reset()
        self._resultShape.SetTitle("NQCDFinal_Total_%s"%moduleInfoString)
        self._resultShape.SetName("NQCDFinal_Total_%s"%moduleInfoString)

        Verbose("Create EWK shape", True)
        self._resultShapeEWK = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShapeEWK.Reset()
        self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s"%moduleInfoString)
        self._resultShapeEWK.SetName("NQCDFinal_EWK_%s"%moduleInfoString)

        Verbose("Create Purity shape", True)
        self._resultShapePurity = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShapePurity.Reset()
        self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s"%moduleInfoString)
        self._resultShapePurity.SetName("NQCDFinal_Purity_%s"%moduleInfoString)

        self._histogramsList = []
        myUncertaintyLabels  = ["statData", "statEWK"]
        self._resultCountObject = extendedCount.ExtendedCount(0.0, [0.0, 0.0], myUncertaintyLabels)

        if optionDoNQCDByBinHistograms:
            for i in range(0, nSplitBins):
                hBin = aux.Clone(self._resultShape)
                hBin.SetTitle("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
                hBin.SetName("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
                self._histogramsList.append(hBin)

        if isinstance(self._resultShape, ROOT.TH2):
            self._doCalculate2D(nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels)
            return

        # Intialize counters for purity calculation in final shape binning
        myShapeDataSum       = []
        myShapeDataSumUncert = []
        myShapeEwkSum        = []
        myShapeEwkSumUncert  = []

        # For-loop: All Bins
        for j in range(1,self._resultShape.GetNbinsX()+1):
            myShapeDataSum.append(0.0)
            myShapeDataSumUncert.append(0.0)
            myShapeEwkSum.append(0.0)
            myShapeEwkSumUncert.append(0.0)

        Verbose("Calculate results separately for each phase-space bin and then combine", True)
        # For-loop: All measurement bins (e.g. tau pT bins for HToTauNu)
        for i in range(0, nSplitBins):
            # N.B: The \"Inclusive\" value is in the zeroth bin

            Verbose("Get data-driven QCD, data, and MC EWK shape histogram for the phase-space bin", True)
            h     = shape.getDataDrivenQCDHistoForSplittedBin(i)
            hData = shape.getDataHistoForSplittedBin(i)
            hEwk  = shape.getEwkHistoForSplittedBin(i)
            
            Verbose("Get normalization factor", True)
            wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
            if self._optionUseInclusiveNorm:
                wQCDLabel = "Inclusive"
            wQCD = 0.0
            
            if not wQCDLabel in normFactors.keys():
                msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel,shape.getHistoName())
                Print(ShellStyles.WarningLabel() + msg, True)
            else:
                wQCD = normFactors[wQCDLabel]                
            msg = "Weighting bin \"%i\" (label=\"%s\")  with normFactor \"%s\"" % (i, wQCDLabel, wQCD)
            Verbose(ShellStyles.NoteLabel() + msg, True)

            # Construct info table (debugging)
            table  = []
            align  = "{:>6} {:^10} {:^15} {:>10} {:>10} {:>10} {:^3} {:^8} {:^3} {:^8}"
            header = align.format("Bin", "Width", "Range", "Content", "NormFactor", "QCD", "+/-", "Data", "+/-", "EWK")
            hLine  = "="*90
            table.append("{:^90}".format(shape.getHistoName()))
            table.append(hLine)
            table.append(header)
            table.append(hLine)

            binSum    = 0.0
            nBins     = h.GetNbinsX()
            binWidth  = hData.GetBinWidth(0)
            xMin      = hData.GetXaxis().GetBinCenter(0)
            xMax      = hData.GetXaxis().GetBinCenter(nBins+1)

            # For-Loop (nested): All bins in the shape histogram 
            for j in range(1, nBins+1):

                # Initialise values
                myResult         = 0.0
                myStatDataUncert = 0.0
                myStatEwkUncert  = 0.0

                # Ignore zero bins
                if abs(h.GetBinContent(j)) > 0.00001:
                    Verbose("Calculating the result")
                    binContent = h.GetBinContent(j)
                    binRange   = "%.1f -> %.1f" % (h.GetXaxis().GetBinLowEdge(j), h.GetXaxis().GetBinUpEdge(j) )
                    binWidth   = GetTH1BinWidthString(h, j)
                    binSum    += binContent
                    myResult   = binContent * wQCD #apply  normalisation factor (transfer from CR to SR))

                    Verbose("Calculate abs. stat. uncert. for data and for MC EWK (Do not calculate here MC EWK syst.)", True)
                    myStatDataUncert = hData.GetBinError(j) * wQCD
                    myStatEwkUncert  = hEwk.GetBinError(j)  * wQCD
                    table.append(align.format(j, binWidth, binRange, "%0.1f" % binContent, wQCD, "%.1f" % myResult, "+/-", "%.1f" % myStatDataUncert, "+/-", "%.1f" % myStatEwkUncert))

                # Get count object
                myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
                self._resultCountObject.add(myCountObject)

                if optionDoNQCDByBinHistograms:
                    Verbose("Setting bin content \"%i\"" % (j), True)
                    self._histogramsList[i].SetBinContent(j, myCountObject.value())
                    self._histogramsList[i].SetBinError(j, myCountObject.statUncertainty())

                binContent = self._resultShape.GetBinContent(j) + myCountObject.value()
                binError   = self._resultShape.GetBinError(j) + myCountObject.statUncertainty()**2
                Verbose("Setting bin %i to content %0.1f +/- %0.1f" % (j, binContent, binError), j==0)
                self._resultShape.SetBinContent(j, binContent)
                self._resultShape.SetBinError(j, binError) # Sum squared (take sqrt outside loop on final squared sum)
                
                Verbose("Sum items for purity calculation", True)
                myShapeDataSum[j-1]       += hData.GetBinContent(j)*wQCD
                myShapeDataSumUncert[j-1] += (hData.GetBinError(j)*wQCD)**2
                myShapeEwkSum[j-1]        += hEwk.GetBinContent(j)*wQCD
                myShapeEwkSumUncert[j-1]  += (hEwk.GetBinError(j)*wQCD)**2

            # Delete the shape histograms
            h.Delete()
            hData.Delete()
            hEwk.Delete()

        # For-loop: All shape bins
        for j in range(1,self._resultShape.GetNbinsX()+1):
            # Take square root of uncertainties
            self._resultShape.SetBinError(j, math.sqrt(self._resultShape.GetBinError(j)))

        # Print detailed results in a formatted table
        qcdResults = self._resultCountObject.getResultAndStatErrorsDict()
        bins       = "%0.f-%.0f" % (1, nBins)
        binRange   = "%.1f -> %.1f" % (xMin, xMax)
        binSum     = "%.1f" % binSum
        nQCD       = "%.1f" % qcdResults["value"]
        dataStat   = "%.1f" % qcdResults["statData"]
        ewkStat    = "%.1f" % qcdResults["statEWK"]
        table.append(align.format(bins, binWidth, binRange, binSum, wQCD, nQCD, "+/-", dataStat, "+/-", ewkStat))
        table.append(hLine)
        for i, line in enumerate(table):
            if i == len(table)-2:
                Verbose(ShellStyles.TestPassedStyle()+line+ShellStyles.NormalStyle(), i==0)
            else:
                Verbose(line, i==0)

        if optionPrintPurityByBins:
            Verbose("Printing Shape Purity bin-by-bin.", True)
            self.PrintPurityByBins(nBins, shape, myShapeDataSum, myShapeDataSumUncert, myShapeEwkSum, myShapeEwkSumUncert)
        return
Exemplo n.º 23
0
 def _initialize(self, h):
     if len(self._binLabels) > 0:
         return
     myTitle = h.GetTitle()
     myList = myTitle.split(self._separator)
     myFactorisationBins = int(
         len(myList) / 2
     )  # allows for the title of the histogram to be placed after the last separator
     myOutput = ""
     for i in range(0, myFactorisationBins):
         self._binLabels.append(myList[i * 2])
         if myList[i * 2 + 1].isdigit():
             self._binCount.append(int(myList[i * 2 + 1]))
         else:
             # try a bug fix by taking first character only
             if myList[i * 2 + 1][0].isdigit():
                 print ShellStyles.WarningLabel(
                 ) + "UnfoldedHistogramReader::_initialize(): tried naive bug fix for last factorisation bin dimension (guessed dimension: %s, histo: %s)" % (
                     myList[i * 2 + 1][0], myList[i * 2 + 1][1:])
                 self._binCount.append(int(myList[i * 2 + 1][0]))
             else:
                 raise Exception(
                     ShellStyles.ErrorLabel() +
                     "UnfoldedHistogramReader: failed to decompose histogram title (it should contain the bin label and nbins information for n bins separated with '%s'\nHistogram title was: %s"
                     % (self._separator, myTitle))
         myOutput += "%s nbins=%d " % (self._binLabels[i],
                                       self._binCount[i])
     if self._debugStatus:
         print "UnfoldedHistogramReader: Histogram binning determined as : %s" % myOutput
     if len(self._binLabels) == 0:
         raise Exception(
             ShellStyles.ErrorLabel() +
             "UnfoldedHistogramReader: failed to decompose histogram title (it should contain the bin label and nbins information for n bins separated with '%s'\nHistogram title was: %s"
             % (self._separator, myTitle))
     self._unfoldedBinCount = h.GetNbinsY()
     # Loop over y axis to find axis values
     myBinCaptions = []
     myBinRanges = []
     for i in range(1, h.GetNbinsY() + 1):
         mySplitBin = h.GetYaxis().GetBinLabel(i).split("/")
         # Obtain bin captions
         if len(self._factorisationCaptions) == 0:
             for s in mySplitBin:
                 myCaption = ""
                 if "=" in s:
                     myCaption = s.split("=")[0]
                 elif ">" in s:
                     myCaption = s.split(">")[0]
                 elif "<" in s:
                     myCaption = s.split("<")[0]
                 self._factorisationFullBinLabels.append([])
                 self._factorisationCaptions.append(myCaption)
                 self._factorisationRanges.append([])
         # Obtain range information
         for k in range(0, len(mySplitBin)):
             if not mySplitBin[k] in self._factorisationFullBinLabels[k]:
                 self._factorisationFullBinLabels[k].append(mySplitBin[k])
             # Remove label and equal signs
             s = mySplitBin[k].replace(self._factorisationCaptions[k],
                                       "").replace("=", "")
             if not s in self._factorisationRanges[k]:
                 self._factorisationRanges[k].append(s)
Exemplo n.º 24
0
    def __init__(self,
                 dataPath,
                 ewkPath,
                 dsetMgr,
                 luminosity,
                 moduleInfoString,
                 normFactors,
                 optionCalculateQCDNormalizationSyst=True,
                 normDataSrc = None,
                 normEWKSrc  = None,
                 optionUseInclusiveNorm=False,
                 verbose=False):
        self._shapePlots = []
        self._shapePlotLabels = []
        self._QCDNormalizationSystPlots = []
        self._QCDNormalizationSystPlotLabels = []
        self._moduleInfoString = moduleInfoString
        self._useInclusiveNorm = optionUseInclusiveNorm
        if len(normFactors.keys()) == 1 and normFactors.keys()[0] == "Inclusive":
            self._useInclusiveNorm = True
        self._verbose = verbose

        msg = "Obtaining final shape from data path \"%s\"" % (dataPath) 
        Verbose(ShellStyles.HighlightStyle() + msg + ShellStyles.NormalStyle(), True)

        # Determine list of plots to consider
        myObjects = dsetMgr.getDataset("Data").getDirectoryContent(dataPath)

        # Ignore unwanted histograms and those designed for HToTauNu 
        keywordList = ["JetEtaPhi"]
        ignoreList  = []
        for k in keywordList:
            ignoreList.extend(filter(lambda name: k in name, myObjects))
            
        msg = "Ignoring a total of %s histograms:" % (len(ignoreList))
        Print(ShellStyles.WarningLabel() + msg, True)
        for hName in ignoreList:
            print "\t", os.path.join(dataPath, hName) 

        # Update myObjects list with filtered results
        myObjects = list(x for x in myObjects if x not in ignoreList)
        
        # For-Loop: All plots to consider
        for i, plotName in enumerate(myObjects, 1):
            
            # For testing
            #if "LdgTrijetMass_AfterAllSelections" not in plotName:
            #    continue

            msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (len(myObjects)), os.path.join(dataPath, plotName) )
            Print(ShellStyles.HighlightAltStyle() + msg + ShellStyles.NormalStyle(), i==1)

            # Ensure that histograms exist
            dataOk = self._sanityChecks(dsetMgr, dataPath, plotName) 
            ewkOk  = self._sanityChecks(dsetMgr, ewkPath, plotName)

            Verbose("Obtaining shape plots (the returned object is not owned)", True)
            myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath, dsetMgr, plotName, luminosity, normFactors)
            
            # Obtain plots for systematics coming from met shape difference for control plots #FIXME-Systematics
            if optionCalculateQCDNormalizationSyst:
                if isinstance(myShapeHisto, ROOT.TH2):
                    msg = "Skipping met shape uncertainty because histogram has more than 1 dimensions!"
                    Print(ShellStyles.WarningLabel() + msg, True)
                else:
                    self._obtainQCDNormalizationSystHistograms(myShapeHisto, dsetMgr, plotName, luminosity, normDataSrc, normEWKSrc)
        return
 def _doCalculate(self, shape, moduleInfoString, normFactors,
                  optionPrintPurityByBins, optionDoNQCDByBinHistograms):
     # Calculate final shape in signal region (shape * w_QCD)
     nSplitBins = shape.getNumberOfPhaseSpaceSplitBins()
     # Initialize result containers
     self._resultShape = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShape.Reset()
     self._resultShape.SetTitle("NQCDFinal_Total_%s" % moduleInfoString)
     self._resultShape.SetName("NQCDFinal_Total_%s" % moduleInfoString)
     self._resultShapeEWK = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapeEWK.Reset()
     self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s" % moduleInfoString)
     self._resultShapeEWK.SetName("NQCDFinal_EWK_%s" % moduleInfoString)
     self._resultShapePurity = aux.Clone(
         shape.getDataDrivenQCDHistoForSplittedBin(0))
     self._resultShapePurity.Reset()
     self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s" %
                                      moduleInfoString)
     self._resultShapePurity.SetName("NQCDFinal_Purity_%s" %
                                     moduleInfoString)
     self._histogramsList = []
     myUncertaintyLabels = ["statData", "statEWK"]
     self._resultCountObject = extendedCount.ExtendedCount(
         0.0, [0.0, 0.0], myUncertaintyLabels)
     if optionDoNQCDByBinHistograms:
         for i in range(0, nSplitBins):
             hBin = aux.Clone(self._resultShape)
             hBin.SetTitle(
                 "NQCDFinal_%s_%s" %
                 (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(
                     " ", ""), moduleInfoString))
             hBin.SetName(
                 "NQCDFinal_%s_%s" %
                 (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(
                     " ", ""), moduleInfoString))
             self._histogramsList.append(hBin)
     if isinstance(self._resultShape, ROOT.TH2):
         self._doCalculate2D(nSplitBins, shape, normFactors,
                             optionPrintPurityByBins,
                             optionDoNQCDByBinHistograms,
                             myUncertaintyLabels)
         return
     # Intialize counters for purity calculation in final shape binning
     myShapeDataSum = []
     myShapeDataSumUncert = []
     myShapeEwkSum = []
     myShapeEwkSumUncert = []
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         myShapeDataSum.append(0.0)
         myShapeDataSumUncert.append(0.0)
         myShapeEwkSum.append(0.0)
         myShapeEwkSumUncert.append(0.0)
     # Calculate results separately for each phase space bin and then combine
     for i in range(0, nSplitBins):
         # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin
         h = shape.getDataDrivenQCDHistoForSplittedBin(i)
         hData = shape.getDataHistoForSplittedBin(i)
         hEwk = shape.getEwkHistoForSplittedBin(i)
         # Get normalization factor
         wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
         if self._optionUseInclusiveNorm:
             wQCDLabel = "Inclusive"
         wQCD = 0.0
         if not wQCDLabel in normFactors.keys():
             print ShellStyles.WarningLabel(
             ) + "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (
                 wQCDLabel, shape.getHistoName())
         else:
             wQCD = normFactors[wQCDLabel]
         # Loop over bins in the shape histogram
         for j in range(1, h.GetNbinsX() + 1):
             myResult = 0.0
             myStatDataUncert = 0.0
             myStatEwkUncert = 0.0
             if abs(h.GetBinContent(j)) > 0.00001:  # Ignore zero bins
                 # Calculate result
                 myResult = h.GetBinContent(j) * wQCD
                 # Calculate abs. stat. uncert. for data and for MC EWK
                 myStatDataUncert = hData.GetBinError(j) * wQCD
                 myStatEwkUncert = hEwk.GetBinError(j) * wQCD
                 #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData"))
                 # Do not calculate here MC EWK syst.
             myCountObject = extendedCount.ExtendedCount(
                 myResult, [myStatDataUncert, myStatEwkUncert],
                 myUncertaintyLabels)
             self._resultCountObject.add(myCountObject)
             if optionDoNQCDByBinHistograms:
                 self._histogramsList[i].SetBinContent(
                     j, myCountObject.value())
                 self._histogramsList[i].SetBinError(
                     j, myCountObject.statUncertainty())
             self._resultShape.SetBinContent(
                 j,
                 self._resultShape.GetBinContent(j) + myCountObject.value())
             self._resultShape.SetBinError(
                 j,
                 self._resultShape.GetBinError(j) +
                 myCountObject.statUncertainty()**2)  # Sum squared
             # Sum items for purity calculation
             myShapeDataSum[j - 1] += hData.GetBinContent(j) * wQCD
             myShapeDataSumUncert[j - 1] += (hData.GetBinError(j) * wQCD)**2
             myShapeEwkSum[j - 1] += hEwk.GetBinContent(j) * wQCD
             myShapeEwkSumUncert[j - 1] += (hEwk.GetBinError(j) * wQCD)**2
         h.Delete()
         hData.Delete()
         hEwk.Delete()
     # Take square root of uncertainties
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         self._resultShape.SetBinError(
             j, math.sqrt(self._resultShape.GetBinError(j)))
     # Print result
     print "NQCD Integral(%s) = %s " % (
         shape.getHistoName(),
         self._resultCountObject.getResultStringFull("%.1f"))
     # Print purity as function of final shape bins
     if optionPrintPurityByBins:
         print "Purity of shape %s" % shape.getHistoName()
         print "shapeBin purity purityUncert"
     for j in range(1, self._resultShape.GetNbinsX() + 1):
         myPurity = 0.0
         myPurityUncert = 0.0
         if abs(myShapeDataSum[j - 1]) > 0.000001:
             myPurity = 1.0 - myShapeEwkSum[j - 1] / myShapeDataSum[j - 1]
             myPurityUncert = errorPropagation.errorPropagationForDivision(
                 myShapeEwkSum[j - 1],
                 math.sqrt(myShapeEwkSumUncert[j - 1]),
                 myShapeDataSum[j - 1],
                 math.sqrt(myShapeDataSumUncert[j - 1]))
         # Store MC EWK content
         self._resultShapeEWK.SetBinContent(j, myShapeEwkSum[j - 1])
         self._resultShapeEWK.SetBinError(
             j, math.sqrt(myShapeEwkSumUncert[j - 1]))
         self._resultShapePurity.SetBinContent(j, myPurity)
         self._resultShapePurity.SetBinError(j, myPurityUncert)
         # Print purity info of final shape
         if optionPrintPurityByBins:
             myString = ""
             if j < self._resultShape.GetNbinsX():
                 myString = "%d..%d" % (
                     self._resultShape.GetXaxis().GetBinLowEdge(j),
                     self._resultShape.GetXaxis().GetBinUpEdge(j))
             else:
                 myString = ">%d" % (
                     self._resultShape.GetXaxis().GetBinLowEdge(j))
             myString += " %.3f %.3f" % (myPurity, myPurityUncert)
             print myString
Exemplo n.º 26
0
def importNormFactors(era, searchMode, optimizationMode, multicrabDirName):
    '''
    Imports the auto-generates  FakeBTranserFactors.py file, which is 
    created by the plotting/fitting templates script  (plotQCD_Fit.py)
    
    This containsthe results  of fitting to the Baseline Data the templates m_{jjb} 
    shapes from the QCD (Inverted Data) and EWK (Baseline MC).
 
    Results include the fit details for each shape and the QCD NormFactor for moving 
    from the ControlRegion (CR) to the Signal Region (SR).
    
    The aforementioned python file and a folder with the histogram ROOT files and the individual
    fits. The foler name will be normalisationPlots/<OptsMode> and will be placed inside the
    <pseudomulticrab_dir>. The autogenerated file file be place in the cwd (i.e. work/)
    '''
    # Find candidates for normalisation scripts
    scriptList = getNormFactorFileList(dirName=multicrabDirName,
                                       fileBaseName=opts.normFactorsSrc)

    # Create a string with the module information used
    moduleInfoString = getModuleInfoString(era, searchMode, optimizationMode)

    # Construct source file name
    src = getGetNormFactorsSrcFilename(multicrabDirName,
                                       opts.normFactorsSrc % moduleInfoString)

    # Check if normalization coefficients are suitable for the choses era
    Verbose("Reading normalisation factors from:\n\t%s" % src, True)

    # Split the path to get just the file name of src
    pathList = src.replace(".py", "").split("/")

    # Insert the directory where the normFactor files reside into the path so that they are found
    if len(pathList) > 1:
        cwd = os.getenv("PWD")
        # Get directories to src in a list [i.e. remove the last entry (file-name) from the pathList]
        dirList = map(str, pathList[:(len(pathList) - 1)])
        srcDir = "/".join(dirList)
        sys.path.insert(0, os.path.join(cwd, srcDir))

    # Import the (normFactor) src file
    Print(
        "Importing the transfer factors from src file %s" %
        (ShellStyles.NoteStyle() + src + ShellStyles.NormalStyle()), True)
    srcBase = os.path.basename("/".join(pathList))
    normFactorsImport = __import__(srcBase)

    # Get the function definition
    myNormFactorsSafetyCheck = getattr(normFactorsImport,
                                       "QCDInvertedNormalizationSafetyCheck")
    Verbose(
        "Check that the era=%s, searchMode=%s, optimizationMode=%s info matches!"
        % (era, searchMode, optimizationMode))
    myNormFactorsSafetyCheck(era, searchMode, optimizationMode)

    # Obtain normalization factors
    myNormFactorsImport = getattr(normFactorsImport, "QCDNormalization")

    # Systematic Variations
    msg = "Disabled NormFactors SystVar Fake Weighting Up/Down"
    Print(ShellStyles.WarningLabel() + msg, True)
    # myNormFactorsImportSystVarFakeWeightingDown = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarDown") #FIXME
    # myNormFactorsImportSystVarFakeWeightingUp   = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarUp")   #FIXME

    # Import the normalisation factors and inform user
    myNormFactors = {}
    if "FakeB" in opts.analysisName:
        myNormFactors[opts.normFactorKey] = myNormFactorsImport
    elif "GenuineB" in opts.analysisName:
        myNormFactors[opts.normFactorKey] = {'Inclusive': 1.0}
    else:
        raise Exception("This should not be reached!")

    # Inform user of normalisation factors
    msg = "Obtained %s normalisation factor dictionary. The values are:" % (
        ShellStyles.NoteStyle() + opts.normFactorKey +
        ShellStyles.NormalStyle())
    Print(msg, True)
    for i, k in enumerate(myNormFactors[opts.normFactorKey], 1):
        keyName = k
        keyValue = myNormFactors[opts.normFactorKey][k]
        #msg += "%s = %s" % (keyName, keyValue)
        msg = "%s = %s" % (keyName, keyValue)
        Print(msg, i == 0)

    # Inform user of weighting up/down
    msg = "Disabled NormFactors Weighting Up/Down"
    Verbose(ShellStyles.WarningLabel() + msg, True)  #fixme
    # myNormFactors["FakeWeightingDown"] = myNormFactorsImportSystVarFakeWeightingDown # FIXME
    # myNormFactors["FakeWeightingUp"]   = myNormFactorsImportSystVarFakeWeightingUp   # FIXME
    return myNormFactors