def getHistFromPkl(subdirs, plotName, sys, *selectors): global loadePkls hist = None resultFile = os.path.join(*((plotDir, ) + subdirs + (plotName + '.pkl', ))) if os.path.isdir(os.path.dirname(resultFile)): if resultFile not in loadedPkls: with lock(resultFile, 'rb') as f: loadedPkls[resultFile] = pickle.load(f) for selector in selectors: filtered = { s: h for s, h in loadedPkls[resultFile][plotName + sys].iteritems() if all(s.count(sel) for sel in selector) } if len(filtered) == 1: hist = addHist(hist, filtered[filtered.keys()[0]]) elif len(filtered) > 1: log.error('Multiple possibilities to look for ' + str(selector) + ': ' + str(filtered.keys())) else: log.error('Missing cache file ' + resultFile) if 'Scale' in sys and not any('MuonEG' in sel for sel in selectors): data = getHistFromPkl(subdirs, plotName, '', ['MuonEG'], ['DoubleEG'], ['DoubleMuon']) dataSys = getHistFromPkl(subdirs, plotName, sys, ['MuonEG'], ['DoubleEG'], ['DoubleMuon']) hist = applySysToOtherHist(data, dataSys, hist) if not hist: log.error('Missing ' + str(selectors) + ' for plot ' + plotName + ' in ' + resultFile) return hist
def saveToCache(self, dir, sys): try: os.makedirs(os.path.join(dir)) except: pass resultFile = os.path.join(dir, self.name + '.pkl') histos = {s.name + s.texName: h for s, h in self.histos.iteritems()} plotName = self.name + (sys if sys else '') try: with lock(resultFile, 'rb', keepLock=True) as f: allPlots = pickle.load(f) allPlots.update({plotName: histos}) except: allPlots = {plotName: histos} with lock(resultFile, 'wb', existingLock=True) as f: pickle.dump(allPlots, f) log.info("Plot " + plotName + " saved to cache")
def loadFromCache(self, resultsDir): resultsFile = os.path.join(resultsDir, self.name + '.pkl') try: with lock(resultsFile, 'rb') as f: allPlots = pickle.load(f) for s in self.histos.keys(): self.histos[s] = allPlots[self.name][s.name + s.texName] except: log.warning('No resultsfile for ' + self.name + '.pkl') return True
def calcSystematics(self, stackForSys, systematics, linearSystematics, resultsDir, postFitInfo=None, addMCStat=True): resultsFile = os.path.join(resultsDir, self.name + '.pkl') with lock(resultsFile, 'rb') as f: allPlots = pickle.load(f) sysKeys = systematics.keys() if addMCStat: sysKeys += [s.name + s.texName + 'StatUp' for s in stackForSys] sysKeys += [s.name + s.texName + 'StatDown' for s in stackForSys] histos_summed = {} for sys in [None] + sysKeys: if sys and not any(x in sys for x in ['Stat', 'sideBand', 'Scale']): plotName = self.name + sys # in the 2D cache, the first key is plotname+sys else: plotName = self.name # for nominal and some exceptions if plotName not in allPlots: # check if sys variation has been run already log.error('No ' + sys + ' variation found for ' + self.name) histos_summed[sys] = None for histName in [ s.name + s.texName for s in stackForSys ]: # in the 2D cache, the second key is name+texName of the sample if sys and 'Scale' in sys and not 'noData' in resultsDir: # ugly hack to apply scale systematics on MC instead of data (only when data is available) data, dataSys = None, None for d in [ d for d in allPlots[self.name] if d.count('data') ]: # for data (if available depending on ee, mumu, emu, SF) data = addHist( data, allPlots[self.name][d]) # get nominal for data dataSys = addHist( dataSys, allPlots[self.name + sys] [d]) # and the eScale or phScale sys for data h = applySysToOtherHist( data, dataSys, allPlots[plotName][histName].Clone( )) # apply the eScale or phScale sys on MC elif sys and 'sideBand' in sys: # ugly hack to apply side band uncertainty h = applySidebandUnc(allPlots[self.name][histName].Clone(), self.name, resultsDir, 'Up' in sys) else: # normal case, simply taken from cache h = allPlots[plotName][histName].Clone() if sys and 'StatUp' in sys and sys.replace( 'StatUp', '') in histName: # MC statistics for plots for i in range(0, h.GetNbinsX() + 1): h.SetBinContent(i, h.GetBinContent(i) + h.GetBinError(i)) if sys and 'StatDown' in sys and sys.replace('StatDown', '') in histName: for i in range(0, h.GetNbinsX() + 1): h.SetBinContent(i, h.GetBinContent(i) - h.GetBinError(i)) if postFitInfo: # apply post-fit scalefactors if available for i in postFitInfo: if histName.count(i): h.Scale(postFitInfo[i]) if h.Integral() == 0: log.debug("Found empty histogram %s:%s in %s/%s.pkl", plotName, histName, resultsDir, self.name) if self.scaleFactor: h.Scale(self.scaleFactor) normalizeBinWidth(h, self.normBinWidth) self.addOverFlowBin1D(h, self.overflowBin) histos_summed[sys] = addHist(histos_summed[sys], h) # Adding the systematics in quadrature relErrors = {} for variation in ['Up', 'Down' ]: # Consider both up and down variations separately summedErrors = histos_summed[None].Clone() summedErrors.Reset() for sys in sysKeys: sysOther = sys.replace('Up', 'Down') if 'Up' in sys else sys.replace( 'Down', 'Up') for i in range(summedErrors.GetNbinsX() + 1): uncertainty = histos_summed[sys].GetBinContent( i) - histos_summed[None].GetBinContent(i) uncertaintyOther = histos_summed[sysOther].GetBinContent( i) - histos_summed[None].GetBinContent(i) if uncertainty * uncertaintyOther > 0 and abs( uncertainty) < abs(uncertaintyOther): continue # Check if both up and down go to same direction, only take the maximum if (variation == 'Up' and uncertainty > 0) or (variation == 'Down' and uncertainty < 0): if sys.count('fsr'): uncertainty *= 1 / sqrt( 2 ) # Hacky, scale fsr uncertainty with 1/sqrt(2) as recommended for TOP pag (in the fit this is handled in the cards) summedErrors.SetBinContent( i, summedErrors.GetBinContent(i) + uncertainty**2) for sampleFilter, unc in linearSystematics.values(): for i in range(summedErrors.GetNbinsX() + 1): if sampleFilter: uncertainty = unc / 100 * sum([ h.GetBinContent(i) for s, h in self.histos.iteritems() if any([s.name.count(f) for f in sampleFilter]) ]) else: uncertainty = unc / 100 * sum([ h.GetBinContent(i) for s, h in self.histos.iteritems() ]) summedErrors.SetBinContent( i, summedErrors.GetBinContent(i) + uncertainty**2) for i in range(summedErrors.GetNbinsX() + 1): summedErrors.SetBinContent(i, sqrt(summedErrors.GetBinContent(i))) summedErrors.Divide(histos_summed[None]) relErrors[variation] = summedErrors return relErrors
def getSysHistos(self, stackForSys, resultsDir, systematics, postFitInfo=None, addMCStat=True): resultsFile = os.path.join(resultsDir, self.name + '.pkl') if resultsFile not in loadedPkls: # Speed optimization: check if plots already loaded with lock(resultsFile, 'rb') as f: loadedPkls[resultsFile] = pickle.load(f) allPlots = { i: {k: l.Clone() for k, l in j.iteritems()} for i, j in loadedPkls[resultsFile].iteritems() } if postFitInfo: # Apply postfit scaling _, sysHistos = self.getSysHistos( stackForSys, resultsDir, systematics ) # Get first the sys histos without post-fit scaling for p in allPlots: allPlots[p] = applyPostFitScaling( allPlots[p], postFitInfo, sysHistos ) # Then use it to apply the same post-fit as for the central value sysKeys = [i + 'Up' for i in systematics] + [i + 'Down' for i in systematics] if addMCStat: sysKeys += [s.name + s.texName + 'StatUp' for s in stackForSys] sysKeys += [s.name + s.texName + 'StatDown' for s in stackForSys] if 'q2' in systematics: constructQ2Sys(allPlots, self.name, stackForSys) if 'pdf' in systematics: constructPdfSys(allPlots, self.name, stackForSys) histos_summed = {} histos_splitted = {} for sys in [None] + sysKeys: histos_splitted[sys] = {} if sys and not any(x in sys for x in ['Stat', 'sideBand', 'Scale']): plotName = self.name + sys # in the 2D cache, the first key is plotname+sys else: plotName = self.name # for nominal and some exceptions if plotName not in allPlots: # check if sys variation has been run already log.error('No ' + sys + ' variation found for ' + self.name) histos_summed[sys] = None for histName in [ s.name + s.texName for s in stackForSys ]: # in the 2D cache, the second key is name+texName of the sample if sys and 'Scale' in sys and not 'noData' in resultsDir: # ugly hack to apply scale systematics on MC instead of data (only when data is available) data, dataSys = None, None for d in [ d for d in allPlots[self.name] if d.count('data') ]: # for data (if available depending on ee, mumu, emu, SF) data = addHist( data, allPlots[self.name][d]) # get nominal for data dataSys = addHist( dataSys, allPlots[self.name + sys] [d]) # and the eScale or phScale sys for data h = applySysToOtherHist( data, dataSys, allPlots[plotName][histName].Clone( )) # apply the eScale or phScale sys on MC elif sys and 'sideBand' in sys: # ugly hack to apply side band uncertainty h = applySidebandUnc(allPlots[self.name][histName].Clone(), self.name, resultsDir, 'Up' in sys) else: # normal case, simply taken from cache h = allPlots[plotName][histName].Clone() if sys and 'StatUp' in sys and sys.replace( 'StatUp', '') in histName: # MC statistics for plots for i in range(0, h.GetNbinsX() + 1): h.SetBinContent(i, h.GetBinContent(i) + h.GetBinError(i)) if sys and 'StatDown' in sys and sys.replace('StatDown', '') in histName: for i in range(0, h.GetNbinsX() + 1): h.SetBinContent(i, h.GetBinContent(i) - h.GetBinError(i)) if h.Integral() == 0: log.debug("Found empty histogram %s:%s in %s/%s.pkl", plotName, histName, resultsDir, self.name) if self.scaleFactor: h.Scale(self.scaleFactor) histos_splitted[sys][histName] = h histos_summed[sys] = addHist(histos_summed[sys], h) return histos_summed, histos_splitted