def getEfficiency(datasets, numerator="Numerator", denominator="Denominator"): # statOption = ROOT.TEfficiency.kFNormal statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson # statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins first = True isData = False teff = ROOT.TEfficiency() for dataset in datasets: n = dataset.getDatasetRootHisto(numerator).getHistogram() d = dataset.getDatasetRootHisto(denominator).getHistogram() if d.GetEntries() == 0: continue checkNegatives(n, d) # removeNegatives(n) # removeNegatives(d) print dataset.getName(), "entries", n.GetEntries(), d.GetEntries() print " bins", n.GetNbinsX(), d.GetNbinsX() print " lowedge", n.GetBinLowEdge(1), d.GetBinLowEdge(1) eff = ROOT.TEfficiency(n, d) eff.SetStatisticOption(statOption) weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() / d.GetEntries() for i in range(1, d.GetNbinsX() + 1): print " bin", i, d.GetBinLowEdge(i), n.GetBinContent( i), d.GetBinContent(i) eff.SetWeight(weight) if first: teff = eff if dataset.isData(): tn = n td = d first = False else: teff.Add(eff) if dataset.isData(): tn.Add(n) td.Add(d) if isData: teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(self.statOption) return teff
def getEfficiency(datasets,numerator="Numerator",denominator="Denominator"): # statOption = ROOT.TEfficiency.kFNormal statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson # statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins first = True isData = False teff = ROOT.TEfficiency() for dataset in datasets: n = dataset.getDatasetRootHisto(numerator).getHistogram() d = dataset.getDatasetRootHisto(denominator).getHistogram() if d.GetEntries() == 0: continue checkNegatives(n,d) # removeNegatives(n) # removeNegatives(d) print dataset.getName(),"entries",n.GetEntries(),d.GetEntries() print " bins",n.GetNbinsX(),d.GetNbinsX() print " lowedge",n.GetBinLowEdge(1),d.GetBinLowEdge(1) eff = ROOT.TEfficiency(n,d) eff.SetStatisticOption(statOption) weight = 1 if dataset.isMC(): weight = dataset.getCrossSection()/d.GetEntries() for i in range(1,d.GetNbinsX()+1): print " bin",i,d.GetBinLowEdge(i),n.GetBinContent(i),d.GetBinContent(i) eff.SetWeight(weight) if first: teff = eff if dataset.isData(): tn = n td = d first = False else: teff.Add(eff) if dataset.isData(): tn.Add(n) td.Add(d) if isData: teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(self.statOption) return teff
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi): # Definitions myList = [] myList_MC = [] myList_Data = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasets: if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() total = den.Integral(0, den.GetXaxis().GetNbins()+1) selected = num.Integral(0, num.GetXaxis().GetNbins()+1) print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3) print "Pass :"******" events" if "binList" in _kwargs: #if len(_kwargs["binList"]) == 1: # continue xBins = _kwargs["binList"] nx = len(xBins)-1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) #elif "Eta" in numPath or "Phi" in numPath: # num = num.Rebin(2) # den = den.Rebin(2) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph eff = convert2TGraph(eff) return eff
def GetEfficiency(datasetsMgr, datasets, numPath, denPath, intLumi): # Definitions myList = [] myList_MC = [] myList_Data = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasets: if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() total = den.Integral(0, den.GetXaxis().GetNbins() + 1) selected = num.Integral(0, num.GetXaxis().GetNbins() + 1) print "Numerical Efficiency", numPath, dataset.getName(), ":", round( selected / total, 3) print "Pass :"******" events" if "binList" in _kwargs: #if len(_kwargs["binList"]) == 1: # continue xBins = _kwargs["binList"] nx = len(xBins) - 1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) #elif "Eta" in numPath or "Phi" in numPath: # num = num.Rebin(2) # den = den.Rebin(2) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph eff = convert2TGraph(eff) return eff
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi): # Definitions myList = [] myList_MC = [] myList_Data = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if "Fake" in numPath and "TT" in dataset.getName(): continue if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() total = den.Integral(0, den.GetXaxis().GetNbins()+1) selected = num.Integral(0, num.GetXaxis().GetNbins()+1) print "Pass :"******" events" print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3) if "binList" in _kwargs: #if len(_kwargs["binList"]) == 1: # continue xBins = _kwargs["binList"] nx = len(xBins)-1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) elif "Eta" in numPath or "Phi" in numPath: num = num.Rebin(2) den = den.Rebin(2) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph eff = convert2TGraph(eff) # Apply default style (according to dataset name) plots._plotStyles[dataset.getName()].apply(eff) # Append in list myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) if dataset.isMC(): eff_MC = eff if "QCD" in dataset.getName(): eff_QCD = eff elif "TT" in dataset.getName(): eff_TT= eff myList_MC.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) else: eff_Data = eff plots._plotStyles[dataset.getName()].apply(eff_Data) #styles.dataStyle.apply(eff_Data) eff_Data.SetMarkerSize(1.2) myList_Data.append(histograms.HistoGraph(eff_Data, plots._legendLabels[dataset.getName()], "p", "P")) numPath = numPath.replace("AfterAllSelections_","") # Define save name saveName = "Eff_" + numPath.split("/")[-1] # Plot the efficiency p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[]) plots.drawPlot(p, saveName, **_kwargs) p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data, "Data", drawStyle="P"), myList_MC, saveFormats=[]) # Save plot in all formats savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut print "save_path", save_path # Draw and save the plot p1.setLuminosity(intLumi) _kwargs["ratio"] = True _kwargs["ratioInvert"] = True _kwargs["cutBoxY"] = {"cutValue": 1.0, "fillColor": 16, "box": False, "line": True, "greaterThan": True, "mainCanvas": True, "ratioCanvas": True} plots.drawPlot(p1, save_path1, **_kwargs) SavePlot(p1, saveName, save_path, saveFormats = [".png", ".pdf", ".C"]) return
def main(opts): # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(False) style.setGridY(False) # If user does not define optimisation mode do all of them if opts.optMode == None: if len(optList) < 1: optList.append("") else: pass optModes = optList else: optModes = [opts.optMode] # For-loop: All optimisation modes for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Set/Overwrite cross-sections for d in datasetsMgr.getAllDatasets(): if "ChargedHiggs" in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # Print dataset information before removing anything? datasetsMgr.PrintInfo() # Determine integrated Lumi before removing data if "Data" in datasetsMgr.getAllDatasetNames(): intLumi = datasetsMgr.getDataset("Data").getLuminosity() # Remove datasets filterKeys = ["TTW"] for key in filterKeys: datasetsMgr.remove(filter(lambda name: key in name, datasetsMgr.getAllDatasetNames())) else: intLumi = 35920 for key in filterKeys: datasetsMgr.remove(filter(lambda name: key in name, datasetsMgr.getAllDatasetNames())) # Re-order datasets datasetOrder = [] haveQCD = False for d in datasetsMgr.getAllDatasets(): if "QCD" in d.getName(): haveQCD = True datasetOrder.append(d.getName()) datasetsMgr.selectAndReorder(datasetOrder) # Define the mapping histograms in numerator->denominator pairs VariableList = [ "LdgTop_Pt", ] minRunRange, maxRunRange, runRange = GetRunRange(datasetsMgr) # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) datasets_ = datasetsMgr.getAllDatasets() dataset_Data = datasetsMgr.getDataDatasets() dataset_MC = datasetsMgr.getMCDatasets() # Print dataset information datasetsMgr.PrintInfo() # For-loop: All numerator-denominator pairs counter = 0 nPlots = len(VariableList) for var in VariableList: histoN = "AfterAllSelections_"+var histoD = "AfterStandardSelections_"+var numerator = os.path.join(opts.folder, histoN) denominator = os.path.join(opts.folder, histoD) counter+=1 msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % counter, "/", "%s:" % (nPlots), "%s" % (var)) Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), counter==1) for dataset in datasets_: if dataset.isMC(): n = dataset.getDatasetRootHisto(numerator) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denominator) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numerator).getHistogram() den = dataset.getDatasetRootHisto(denominator).getHistogram() x_bins = num.GetXaxis().GetNbins() i = 1 while i < x_bins: xvalue = num.GetXaxis().GetBinLowEdge(i)+0.5*num.GetXaxis().GetBinWidth(i) if xvalue < 20: my_bin = i i+=1 my_xvalue = num.GetXaxis().GetBinUpEdge(my_bin)+0.5*num.GetXaxis().GetBinWidth(my_bin) total = den.Integral(0, x_bins) #my_bin selected = num.Integral(0, x_bins) #my_bin plotName = "Eff_%s" % (var) # Get Efficiency Plots _kwargs = GetHistoKwargs(var, opts) eff_Data = GetEfficiency(datasetsMgr, dataset_Data, numerator, denominator, intLumi) eff_MC = GetEfficiency(datasetsMgr, dataset_MC, numerator, denominator, intLumi) # Apply Styles styles.dataStyle.apply(eff_Data) styles.qcdStyle.apply(eff_MC) # Create the plot p = plots.ComparisonPlot(histograms.HistoGraph(eff_Data, "eff_Data", "p", "P"), histograms.HistoGraph(eff_MC, "eff_MC" , "p", "P"), saveFormats=[]) # Define the legend entries p.histoMgr.setHistoLegendLabelMany( { "eff_Data": "Data", "eff_MC" : "QCD" } ) # Append in list myList = [] myList.append(histograms.HistoGraph(eff_Data, plots._legendLabels["Data"], "lp", "P")) p.setLuminosity(intLumi) _kwargs["ratio"] = True _kwargs["opts"] = {"xmin": 0.0, "xmax": 600.0, "ymin": 0.0, "ymax": 0.16, "ymaxfactor": 1.8} _kwargs["cutBoxY"] = {"cutValue": 1.10, "fillColor": ROOT.kGray+1, "fillStyle": 3001, "box": False, "line": True, "greaterThan": True, "mainCanvas": False, "ratioCanvas": True, "mirror": True} plots.drawPlot(p, plotName, **_kwargs) # Draw savePath = os.path.join(opts.saveDir, opts.optMode) SavePlot(p, plotName, savePath, saveFormats = [".png", ".pdf", ".C"]) # Save results in JSON name = opts.mcrab.split("_")[-3] name = name.replace(opts.analysisName, "") jsonName = "topMisID_"+ name +"_TopMassCut400.json" analysis = opts.analysisName label = "2016" plotDir = os.path.join(opts.folder, jsonName) pythonWriter.addParameters(plotDir, label, runRange, opts.intLumi, eff_Data) pythonWriter.addMCParameters(label, eff_MC) fileName_json = jsonName pythonWriter.writeJSON(fileName_json) return
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def): # Definitions myList = [] default_eff = None datasetList = [] ttVariationEff = [] _kwargs = GetHistoKwargs(numPath, opts) nx = 0 if len(_kwargs["binList"]) > 0: xBins = _kwargs["binList"] nx = len(xBins)-1 counter = 0 # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) d = dataset.getDatasetRootHisto(denPath) num = n.getHistogram() den = d.getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Calculations total = den.Integral(0, den.GetXaxis().GetNbins()+1) selected = num.Integral(0, num.GetXaxis().GetNbins()+1) if 0: print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP datasetTT = datasetsMgr.getDataset("TT") # Get the histograms numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram() denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram() if nx > 0: numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins) denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins) ''' for i in range(1, num.GetNbinsX()+1): nbin = num.GetBinContent(i) dbin = den.GetBinContent(i) nbinTT = numTT.GetBinContent(i) dbinTT = denTT.GetBinContent(i) eps = nbin/dbin epsTT = nbinTT/dbinTT ratioTT = eps/epsTT if ratioTT > 1: ratioTT = 1/ratioTT #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5) #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100 ''' eff_ref = ROOT.TEfficiency(numTT, denTT) eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph gEff = convert2TGraph(eff) gEffRef = convert2TGraph(eff_ref) # Keep the default tt and variations tt efficiency plots if dataset.getName() == "TT": default_eff = gEffRef.Clone() else: datasetList.append(dataset.getName()) ttVariationEff.append(gEff) # Style definitions stylesDef = styles.ttStyle styles0 = styles.signalStyleHToTB300 styles1 = styles.signalStyleHToTB500 styles2 = styles.signalStyleHToTB800 styles3 = styles.signalStyleHToTB500 styles4 = styles.signalStyleHToTB1000 styles5 = styles.signalStyleHToTB2000 styles6 = styles.signalStyleHToTB180 styles7 = styles.signalStyleHToTB3000 styles8 = styles.signalStyleHToTB200 if dataset.getName() == "TT": styles.ttStyle.apply(gEffRef) legend_ref = "t#bar{t}" if opts.type == "partonShower": legend_ref = "t#bar{t} (Pythia8)" elif opts.type == "evtGen": legend_ref = "t#bar{t} (Powheg)" refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P") else: styles.markerStyles[counter].apply(gEff) legend = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ") legend = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up") legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV") legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV") legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV") legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV") legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV") legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV") legend = legend.replace("TuneEE5C", "Herwig++") legend += ")" counter+=1 #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P")) myList.append(histograms.HistoGraph(gEff, legend, "p", "P")) units = "GeV/c" if eff_def == "fakeTop": _kwargs["xlabel"] = "candidate p_{T} (%s)" % (units) elif eff_def == "inclusiveTop" or eff_def == "genuineTop": _kwargs["xlabel"] = "generated top p_{T} (%s)" % (units) else: _kwargs["xlabel"] = "p_{T} (%s)" % (units) # Define stuff numPath = numPath.replace("AfterAllSelections_","") saveName = "Efficiency_%s_%s" % (eff_def, opts.type) #saveName = saveName.replace("__", "_Inclusive_") # Plot the efficiency p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[]) savePath = os.path.join(opts.saveDir, opts.optMode) plots.drawPlot(p, savePath, **_kwargs) # Save plot in all formats SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"]) # ============================================================================== # I need the uncertainties from the ratio of all plots (ONLY for Genuine) # ============================================================================== if eff_def == "genuineTop": uncWriter = UncertaintyWriter() jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT) analysis = opts.analysisName saveDir = os.path.join("", jsonName) for i in range(0, len(datasetList)): uncWriter.addParameters(datasetList[i], analysis, saveDir, default_eff, ttVariationEff[i]) #print "i = ", i, " Dataset = ", datasetList[i] #for iBin in range(1, len(xBins)): #ratio = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin)) #unc = 0.5*(1.0 - ratio) #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), " Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), " Uncertainty =", unc uncWriter.writeJSON(jsonName) return
def PlotProb(datasets, numPath, denPath): EfficiencyList = [] index = 0 for dataset in datasets: datasetName = dataset.getName() print "Dataset = ", datasetName statOption = ROOT.TEfficiency.kFNormal ## n = dataset.getDatasetRootHisto(numPath).getHistogram() # n.normalizeToOne() ## d = dataset.getDatasetRootHisto(denPath).getHistogram() nn = dataset.getDatasetRootHisto(numPath) nn.normalizeToLuminosity(35.8*(10**3)) n = nn.getHistogram() dd = dataset.getDatasetRootHisto(denPath) # dd.normalizeToOne() # dd.normalizeToLuminosity(36.3*(10**3)) dd.normalizeToLuminosity(35.8*(10**3)) # dd.normalizeByCrossSection() d = dd.getHistogram() # if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # continue # elif "M_" in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # continue if "Event" in numPath: n.Rebin(10) d.Rebin(10) else: n.Rebin(5) d.Rebin(5) if d.GetEntries() == 0 or n.GetEntries() == 0: continue if n.GetEntries() > d.GetEntries(): continue # Check Negatives CheckNegatives(n, d, True) # Remove Negatives RemoveNegatives(n) nBins = d.GetNbinsX() xMin = d.GetXaxis().GetXmin() xMax = d.GetXaxis().GetXmax() binwidth = int(n.GetBinWidth(0)) # ----------------------------------------------------------------------------------------- # # Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows # ----------------------------------------------------------------------------------------- # if (0): print "\n" print "=========== getEfficiency:" print "Dataset = ", dataset.getName() print "Numerator: entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1) print "Denominator: entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1) print "\n" print ">>>>>> Sanity Check: <<<<<<" print "Numerator Mean = ", n.GetMean() print "Numerator RMS = ", n.GetRMS() print "Numerator Integral = ", n.Integral(1, nBins) print "Denominator Mean = ", d.GetMean() print "Denominator RMS = ", d.GetRMS() print "Denominator Integral = ", d.Integral(1, nBins) if (n.GetMean() == 0 or d.GetMean() == 0): continue if (n.GetRMS() == 0 or d.GetRMS() == 0): continue if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue # if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue; effic = ROOT.TEfficiency(n,d) effic.SetStatisticOption(statOption) weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() effic.SetWeight(weight) eff = convert2TGraph(effic) # Apply Styles if "TT" in datasetName: if index == 0: styles.signalStyleHToTB500.apply(eff) # styles.ttStyle.apply(eff) eff.SetLineStyle(1) eff.SetLineWidth(3) eff.SetLineColor(619) legend = "Default: t#bar{t}" index = 1 else: styles.signalStyleHToTB500.apply(eff) # styles.ttStyle.apply(eff) eff.SetLineStyle(1) eff.SetLineWidth(3) eff.SetLineColor(417) legend = "#Delta R(q,q')>0.8: t#bar{t}" elif "M_500" in datasetName: styles.signalStyleHToTB500.apply(eff) legend = "H^{+} m_{H^{+}} = 500 GeV" elif "M_300" in datasetName: styles.signalStyleHToTB300.apply(eff) legend = "H^{+} m_{H^{+}} = 300 GeV" elif "M_1000" in datasetName: styles.signalStyleHToTB1000.apply(eff) legend = "H^{+} m_{H^{+}} = 1000 GeV" elif "M_800" in datasetName: styles.signalStyleHToTB800.apply(eff) legend = "H^{+} m_{H^{+}} = 800 GeV" elif "M_200" in datasetName: styles.signalStyleHToTB200.apply(eff) legend = "H^{+} m_{H^{+}} = 200 GeV" else: styles.ttStyle.apply(eff) legend = "other" EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P")) saveName = "Eff_"+numPath.split("/")[-1]+"Over"+denPath.split("/")[-1] if "Pt" in numPath: xMin = 0.0 # rebinX = 2 xMax = 805.0 # xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Efficiency / " + str(binwidth) + " "+units yMin = 0.0 yMax = 1.1 elif "_Eta" in numPath: xMin = -3.0 xMax = +3.0 xTitle = "#eta" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 elif "_Mass" in numPath: xMin = 50.0 xMax = 300 xTitle = "M (GeV/c^{2})" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 elif "_Phi" in numPath: xMin = -3 xMax = +3 xTitle = "#phi" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 else: xMin = 0.0 xMax = 250.0 xTitle = "xTitle" yTitle = "yTitle" yMin = 0.0 yMax = 1.1 if "Fake" in numPath: # xMin = 95.0 # rebinX = 4 xMax = 805.0 xTitle = "candidate p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Misid rate / " + str(binwidth) + " " +units yMin = 0.0 yMax = 0.11 if "Event" in numPath: rebinX = 2 # xMin = 95.0 xMax = 805.0 xTitle = "candidate p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Efficiency / " + str(binwidth) + " "+ units yMin = 0.0 yMax = 1.1 if "NonMatched" in numPath: xMin = 90.0 rebinX = 4 xMax = 700.0 xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} (GeV)" yTitle = "Efficiency" yMin = 0.0 yMax = 0.15 if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath: xMin = 0.0 # rebinX = 4 xMax = 805.0 #705 units = "GeV/c" xTitle = "generated top p_{T} (GeV/c)" yTitle = "Efficiency / " + str(binwidth) + " " + units yMin = 0.0 yMax = 1.1 if "SameFake" in numPath: xMin = 95.0 rebinX = 4 xMax = 705.0 xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} [GeV]" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 options = {"ymin": yMin , "ymax": yMax, "xmin":xMin, "xMax":xMax} # if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # return # if "M_" in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # return p = plots.PlotBase(datasetRootHistos=EfficiencyList, saveFormats=kwargs.get("saveFormats")) #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[]) p.createFrame(saveName, opts=options) # p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX"))) # Set Titles # p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel")) #"ylabel" p.getFrame().GetXaxis().SetTitle(xTitle) p.getFrame().GetYaxis().SetTitle(yTitle) # Set range p.getFrame().GetXaxis().SetRangeUser(xMin, xMax) moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2} # moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1} p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend)) # Add Standard Texts to plot histograms.addStandardTexts() p.draw() # Save plot in all formats savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut # SavePlot(p, saveName, savePath) SavePlot(p, saveName, save_path) return
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs): ''' See https://root.cern.ch/doc/master/classTEfficiency.html ''' HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs) verbose = kwargs.get("verbose") normalizeTo = kwargs.get("normalizeTo") cutDirection = kwargs.get("cutDirection") Verbose("Calculating the cut-efficiency (%s) for histo with name %s" % (cutDirection, histoName)) # Choose statistics options statOpts = [ "kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey", "kBUniform", "kBayesian" ] if statOpt not in statOpts: raise Exception( "Invalid statistics option \"%s\". Please choose one from the following:\n\t%s" % (statOpt, "\n\t".join(statOpts))) if statOpt == "kFCP": statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson elif statOpt == "kFNormal": statOption = ROOT.TEfficiency.kFNormal # Normal Approximation elif statOpt == "kFWilson": statOption = ROOT.TEfficiency.kFWilson # Wilson elif statOpt == "kFAC": statOption = ROOT.TEfficiency.kFAC # Agresti-Coull elif statOpt == "kFFC": statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins elif statOpt == "kBJeffrey": statOption = ROOT.TEfficiency.kBJeffrey # Jeffrey elif statOpt == "kBUniform": statOption = ROOT.TEfficiency.kBUniform # Uniform Prior elif statOpt == "kBayesian": statOption = ROOT.TEfficiency.kBayesian # Custom Prior else: raise Exception("This should never be reached") # Declare variables & options first = True isData = False teff = ROOT.TEfficiency() # Get the ROOT histogram rootHisto = dataset.getDatasetRootHisto(histoName) # Normalise the histogram NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo) #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo) ## Get a clone of the wrapped histogram normalized as requested. h = rootHisto.getHistogram() titleX = h.GetXaxis().GetTitle() binWidth = h.GetXaxis().GetBinWidth(0) titleY = "efficiency (%s) / %s" % (cutDirection, GetBinwidthDecimals(binWidth) % (binWidth)) # If empty return if h.GetEntries() == 0: return # Create the numerator/denominator histograms numerator = h.Clone("Numerator") denominator = h.Clone("Denominator") # Reset the numerator/denominator histograms numerator.Reset() denominator.Reset() # Calculate the instances passing a given cut (all bins) nBinsX = h.GetNbinsX() + 1 for iBin in range(1, nBinsX): nTotal = h.Integral(0, nBinsX) if cutDirection == ">": nPass = h.Integral(iBin + 1, nBinsX) elif cutDirection == "<": nPass = nTotal - h.Integral(iBin + 1, nBinsX) else: raise Exception( "Invalid cut direction \"%s\". Please choose either \">\" or \"<\"" % (cutDirection)) # Sanity check if nPass < 0: nPass = 0 # Fill the numerator/denominator histograms # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal) numerator.SetBinContent(iBin, nPass) numerator.SetBinError(iBin, math.sqrt(nPass) / 10) # denominator.SetBinContent(iBin, nTotal) denominator.SetBinError(iBin, math.sqrt(nTotal) / 10) # Check for negative values CheckNegatives(numerator, denominator) # Create TEfficiency object using the two histos eff = ROOT.TEfficiency(numerator, denominator) eff.SetStatisticOption(statOption) Verbose("The statistic option was set to %s" % (eff.GetStatisticOption())) # Save info in a table (debugging) table = [] hLine = "=" * 70 msgAlign = '{:<5} {:<20} {:<20} {:<20}' title = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up") table.append("\n" + hLine) table.append(title) table.append(hLine) for iBin in range(1, nBinsX): e = eff.GetEfficiency(iBin) errLow = eff.GetEfficiencyErrorLow(iBin) errUp = eff.GetEfficiencyErrorUp(iBin) values = msgAlign.format(iBin, e, errLow, errUp) table.append(values) table.append(hLine) # Verbose mode if verbose: for l in table: print l weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) if first: teff = eff if dataset.isData(): tn = numerator td = denominator first = False else: teff.Add(eff) if dataset.isData(): tn.Add(numerator) td.Add(denominator) if isData: teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(self.statOption) style = styleDict[dataset.getName()] return Convert2TGraph(teff, dataset, style, titleX, titleY)
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi): # Definitions myList = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): #if "Fake" in numPath and "TT" in dataset.getName(): # continue # Get the histograms #num = dataset.getDatasetRootHisto(numPath).getHistogram() #den = dataset.getDatasetRootHisto(denPath).getHistogram() n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() if "binList" in _kwargs: xBins = _kwargs["binList"] nx = len(xBins) - 1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Remove negative bins and ensure numerator bin <= denominator bin CheckNegatives(num, den, True) # RemoveNegatives(num) # RemoveNegatives(den) # Sanity check (Histograms are valid and consistent) - Always false! # if not ROOT.TEfficiency.CheckConsistency(num, den): # continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) # # Set the weights - Why is this needed? if 0: weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) # Convert to TGraph eff = convert2TGraph(eff) # Apply default style (according to dataset name) plots._plotStyles[dataset.getName()].apply(eff) # Append in list myList.append( histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) # Define save name saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split( "/")[-1] # Plot the efficiency p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[]) plots.drawPlot(p, saveName, **_kwargs) # Save plot in all formats savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode) #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut SavePlot(p, saveName, save_path, saveFormats=[".png", ".pdf", ".C"]) return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi): # Definitions myList = [] myList_MC = [] myList_Data = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if "Fake" in numPath and "TT" in dataset.getName(): continue if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() total = den.Integral(0, den.GetXaxis().GetNbins() + 1) selected = num.Integral(0, num.GetXaxis().GetNbins() + 1) print "Pass :"******" events" print "Numerical Efficiency", numPath, dataset.getName(), ":", round( selected / total, 3) if "binList" in _kwargs: #if len(_kwargs["binList"]) == 1: # continue xBins = _kwargs["binList"] nx = len(xBins) - 1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) elif "Eta" in numPath or "Phi" in numPath: num = num.Rebin(2) den = den.Rebin(2) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph eff = convert2TGraph(eff) # Apply default style (according to dataset name) plots._plotStyles[dataset.getName()].apply(eff) # Append in list myList.append( histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) if dataset.isMC(): eff_MC = eff if "QCD" in dataset.getName(): eff_QCD = eff elif "TT" in dataset.getName(): eff_TT = eff myList_MC.append( histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) else: eff_Data = eff plots._plotStyles[dataset.getName()].apply(eff_Data) #styles.dataStyle.apply(eff_Data) eff_Data.SetMarkerSize(1.2) myList_Data.append( histograms.HistoGraph(eff_Data, plots._legendLabels[dataset.getName()], "p", "P")) numPath = numPath.replace("AfterAllSelections_", "") # Define save name saveName = "Eff_" + numPath.split("/")[-1] # Plot the efficiency p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[]) plots.drawPlot(p, saveName, **_kwargs) p1 = plots.ComparisonManyPlot(histograms.HistoGraph(eff_Data, "Data", drawStyle="P"), myList_MC, saveFormats=[]) # Save plot in all formats savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut print "save_path", save_path # Draw and save the plot p1.setLuminosity(intLumi) _kwargs["ratio"] = True _kwargs["ratioInvert"] = True _kwargs["cutBoxY"] = { "cutValue": 1.0, "fillColor": 16, "box": False, "line": True, "greaterThan": True, "mainCanvas": True, "ratioCanvas": True } plots.drawPlot(p1, save_path1, **_kwargs) SavePlot(p1, saveName, save_path, saveFormats=[".png", ".pdf", ".C"]) return
def main(opts): # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(False) style.setGridY(False) # If user does not define optimisation mode do all of them if opts.optMode == None: if len(optList) < 1: optList.append("") else: pass optModes = optList else: optModes = [opts.optMode] # For-loop: All optimisation modes for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Set/Overwrite cross-sections for d in datasetsMgr.getAllDatasets(): if "ChargedHiggs" in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # Print dataset information before removing anything? datasetsMgr.PrintInfo() # Determine integrated Lumi before removing data if "Data" in datasetsMgr.getAllDatasetNames(): intLumi = datasetsMgr.getDataset("Data").getLuminosity() # Remove datasets filterKeys = ["TTW"] for key in filterKeys: datasetsMgr.remove( filter(lambda name: key in name, datasetsMgr.getAllDatasetNames())) else: intLumi = 35920 for key in filterKeys: datasetsMgr.remove( filter(lambda name: key in name, datasetsMgr.getAllDatasetNames())) # Re-order datasets datasetOrder = [] haveQCD = False for d in datasetsMgr.getAllDatasets(): if "QCD" in d.getName(): haveQCD = True datasetOrder.append(d.getName()) datasetsMgr.selectAndReorder(datasetOrder) # Define the mapping histograms in numerator->denominator pairs VariableList = [ "LdgTop_Pt", ] minRunRange, maxRunRange, runRange = GetRunRange(datasetsMgr) # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) datasets_ = datasetsMgr.getAllDatasets() dataset_Data = datasetsMgr.getDataDatasets() dataset_MC = datasetsMgr.getMCDatasets() # Print dataset information datasetsMgr.PrintInfo() # For-loop: All numerator-denominator pairs counter = 0 nPlots = len(VariableList) for var in VariableList: histoN = "AfterAllSelections_" + var histoD = "AfterStandardSelections_" + var numerator = os.path.join(opts.folder, histoN) denominator = os.path.join(opts.folder, histoD) counter += 1 msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format( "Histogram", "%i" % counter, "/", "%s:" % (nPlots), "%s" % (var)) Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), counter == 1) for dataset in datasets_: if dataset.isMC(): n = dataset.getDatasetRootHisto(numerator) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denominator) d.normalizeToLuminosity(intLumi) den = d.getHistogram() else: num = dataset.getDatasetRootHisto(numerator).getHistogram() den = dataset.getDatasetRootHisto( denominator).getHistogram() x_bins = num.GetXaxis().GetNbins() i = 1 while i < x_bins: xvalue = num.GetXaxis().GetBinLowEdge( i) + 0.5 * num.GetXaxis().GetBinWidth(i) if xvalue < 20: my_bin = i i += 1 my_xvalue = num.GetXaxis().GetBinUpEdge( my_bin) + 0.5 * num.GetXaxis().GetBinWidth(my_bin) total = den.Integral(0, x_bins) #my_bin selected = num.Integral(0, x_bins) #my_bin plotName = "Eff_%s" % (var) # Get Efficiency Plots _kwargs = GetHistoKwargs(var, opts) eff_Data = GetEfficiency(datasetsMgr, dataset_Data, numerator, denominator, intLumi) eff_MC = GetEfficiency(datasetsMgr, dataset_MC, numerator, denominator, intLumi) # Apply Styles styles.dataStyle.apply(eff_Data) styles.qcdStyle.apply(eff_MC) # Create the plot p = plots.ComparisonPlot( histograms.HistoGraph(eff_Data, "eff_Data", "p", "P"), histograms.HistoGraph(eff_MC, "eff_MC", "p", "P"), saveFormats=[]) # Define the legend entries p.histoMgr.setHistoLegendLabelMany({ "eff_Data": "Data", "eff_MC": "QCD" }) # Append in list myList = [] myList.append( histograms.HistoGraph(eff_Data, plots._legendLabels["Data"], "lp", "P")) p.setLuminosity(intLumi) _kwargs["ratio"] = True _kwargs["opts"] = { "xmin": 0.0, "xmax": 600.0, "ymin": 0.0, "ymax": 0.16, "ymaxfactor": 1.8 } _kwargs["cutBoxY"] = { "cutValue": 1.10, "fillColor": ROOT.kGray + 1, "fillStyle": 3001, "box": False, "line": True, "greaterThan": True, "mainCanvas": False, "ratioCanvas": True, "mirror": True } plots.drawPlot(p, plotName, **_kwargs) # Draw savePath = os.path.join(opts.saveDir, opts.optMode) SavePlot(p, plotName, savePath, saveFormats=[".png", ".pdf", ".C"]) # Save results in JSON name = opts.mcrab.split("_")[-3] name = name.replace(opts.analysisName, "") jsonName = "topMisID_" + name + "_TopMassCut400.json" analysis = opts.analysisName label = "2016" plotDir = os.path.join(opts.folder, jsonName) pythonWriter.addParameters(plotDir, label, runRange, opts.intLumi, eff_Data) pythonWriter.addMCParameters(label, eff_MC) fileName_json = jsonName pythonWriter.writeJSON(fileName_json) return
def PlotEfficiency(datasetsMgr, numPath, denPath, eff_def): # Definitions myList = [] default_eff = None datasetList = [] ttVariationEff = [] _kwargs = GetHistoKwargs(numPath, opts) nx = 0 if len(_kwargs["binList"]) > 0: xBins = _kwargs["binList"] nx = len(xBins) - 1 counter = 0 # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) d = dataset.getDatasetRootHisto(denPath) num = n.getHistogram() den = d.getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Calculations total = den.Integral(0, den.GetXaxis().GetNbins() + 1) selected = num.Integral(0, num.GetXaxis().GetNbins() + 1) if 0: print "Numerical Efficiency", numPath, dataset.getName( ), ":", round(selected / total, 3) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP datasetTT = datasetsMgr.getDataset("TT") # Get the histograms numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram() denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram() if nx > 0: numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins) denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins) ''' for i in range(1, num.GetNbinsX()+1): nbin = num.GetBinContent(i) dbin = den.GetBinContent(i) nbinTT = numTT.GetBinContent(i) dbinTT = denTT.GetBinContent(i) eps = nbin/dbin epsTT = nbinTT/dbinTT ratioTT = eps/epsTT if ratioTT > 1: ratioTT = 1/ratioTT #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5) #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100 ''' eff_ref = ROOT.TEfficiency(numTT, denTT) eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph gEff = convert2TGraph(eff) gEffRef = convert2TGraph(eff_ref) # Keep the default tt and variations tt efficiency plots if dataset.getName() == "TT": default_eff = gEffRef.Clone() else: datasetList.append(dataset.getName()) ttVariationEff.append(gEff) # Style definitions stylesDef = styles.ttStyle styles0 = styles.signalStyleHToTB300 styles1 = styles.signalStyleHToTB500 styles2 = styles.signalStyleHToTB800 styles3 = styles.signalStyleHToTB500 styles4 = styles.signalStyleHToTB1000 styles5 = styles.signalStyleHToTB2000 styles6 = styles.signalStyleHToTB180 styles7 = styles.signalStyleHToTB3000 styles8 = styles.signalStyleHToTB200 if dataset.getName() == "TT": styles.ttStyle.apply(gEffRef) legend_ref = "t#bar{t}" if opts.type == "partonShower": legend_ref = "t#bar{t} (Pythia8)" elif opts.type == "evtGen": legend_ref = "t#bar{t} (Powheg)" refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P") else: styles.markerStyles[counter].apply(gEff) legend = dataset.getName().replace("TT_", "t#bar{t} (").replace( "isr", "ISR ").replace("fsr", "FSR ") legend = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace( "UP", "up") legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV") legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV") legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV") legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV") legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV") legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV") legend = legend.replace("TuneEE5C", "Herwig++") legend += ")" counter += 1 #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P")) myList.append(histograms.HistoGraph(gEff, legend, "p", "P")) units = "GeV/c" if eff_def == "fakeTop": _kwargs["xlabel"] = "candidate p_{T} (%s)" % (units) elif eff_def == "inclusiveTop" or eff_def == "genuineTop": _kwargs["xlabel"] = "generated top p_{T} (%s)" % (units) else: _kwargs["xlabel"] = "p_{T} (%s)" % (units) # Define stuff numPath = numPath.replace("AfterAllSelections_", "") saveName = "Efficiency_%s_%s" % (eff_def, opts.type) #saveName = saveName.replace("__", "_Inclusive_") # Plot the efficiency p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[]) savePath = os.path.join(opts.saveDir, opts.optMode) plots.drawPlot(p, savePath, **_kwargs) # Save plot in all formats SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"]) # ============================================================================== # I need the uncertainties from the ratio of all plots (ONLY for Genuine) # ============================================================================== if eff_def == "genuineTop": uncWriter = UncertaintyWriter() jsonName = "uncertainties_%s_BDT_%s.json" % (opts.type, opts.BDT) analysis = opts.analysisName saveDir = os.path.join("", jsonName) for i in range(0, len(datasetList)): uncWriter.addParameters(datasetList[i], analysis, saveDir, default_eff, ttVariationEff[i]) #print "i = ", i, " Dataset = ", datasetList[i] #for iBin in range(1, len(xBins)): #ratio = float(default_eff.GetEfficiency(iBin))/float(ttVariationEff[i].GetEfficiency(iBin)) #unc = 0.5*(1.0 - ratio) #print "iBin = ", iBin, " Default TT=", default_eff.GetEfficiency(iBin), " Variation (", datasetList[i], ") =", ttVariationEff[i].GetEfficiency(iBin), " Uncertainty =", unc uncWriter.writeJSON(jsonName) return
def PlotProb(datasets, numPath, denPath): EfficiencyList = [] index = 0 for dataset in datasets: datasetName = dataset.getName() print "Dataset = ", datasetName statOption = ROOT.TEfficiency.kFNormal ## n = dataset.getDatasetRootHisto(numPath).getHistogram() # n.normalizeToOne() ## d = dataset.getDatasetRootHisto(denPath).getHistogram() nn = dataset.getDatasetRootHisto(numPath) nn.normalizeToLuminosity(35.8 * (10**3)) n = nn.getHistogram() dd = dataset.getDatasetRootHisto(denPath) # dd.normalizeToOne() # dd.normalizeToLuminosity(36.3*(10**3)) dd.normalizeToLuminosity(35.8 * (10**3)) # dd.normalizeByCrossSection() d = dd.getHistogram() # if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # continue # elif "M_" in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # continue if "Event" in numPath: n.Rebin(10) d.Rebin(10) else: n.Rebin(5) d.Rebin(5) if d.GetEntries() == 0 or n.GetEntries() == 0: continue if n.GetEntries() > d.GetEntries(): continue # Check Negatives CheckNegatives(n, d, True) # Remove Negatives RemoveNegatives(n) nBins = d.GetNbinsX() xMin = d.GetXaxis().GetXmin() xMax = d.GetXaxis().GetXmax() binwidth = int(n.GetBinWidth(0)) # ----------------------------------------------------------------------------------------- # # Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows # ----------------------------------------------------------------------------------------- # if (0): print "\n" print "=========== getEfficiency:" print "Dataset = ", dataset.getName() print "Numerator: entries=", n.GetEntries( ), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1) print "Denominator: entries=", d.GetEntries( ), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1) print "\n" print ">>>>>> Sanity Check: <<<<<<" print "Numerator Mean = ", n.GetMean() print "Numerator RMS = ", n.GetRMS() print "Numerator Integral = ", n.Integral(1, nBins) print "Denominator Mean = ", d.GetMean() print "Denominator RMS = ", d.GetRMS() print "Denominator Integral = ", d.Integral(1, nBins) if (n.GetMean() == 0 or d.GetMean() == 0): continue if (n.GetRMS() == 0 or d.GetRMS() == 0): continue if (n.Integral(1, nBins) == 0 or d.Integral(1, nBins) == 0): continue # if not (ROOT.TEfficiency.CheckConsistency(n,d)): continue; effic = ROOT.TEfficiency(n, d) effic.SetStatisticOption(statOption) weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() effic.SetWeight(weight) eff = convert2TGraph(effic) # Apply Styles if "TT" in datasetName: if index == 0: styles.signalStyleHToTB500.apply(eff) # styles.ttStyle.apply(eff) eff.SetLineStyle(1) eff.SetLineWidth(3) eff.SetLineColor(619) legend = "Default: t#bar{t}" index = 1 else: styles.signalStyleHToTB500.apply(eff) # styles.ttStyle.apply(eff) eff.SetLineStyle(1) eff.SetLineWidth(3) eff.SetLineColor(417) legend = "#Delta R(q,q')>0.8: t#bar{t}" elif "M_500" in datasetName: styles.signalStyleHToTB500.apply(eff) legend = "H^{+} m_{H^{+}} = 500 GeV" elif "M_300" in datasetName: styles.signalStyleHToTB300.apply(eff) legend = "H^{+} m_{H^{+}} = 300 GeV" elif "M_1000" in datasetName: styles.signalStyleHToTB1000.apply(eff) legend = "H^{+} m_{H^{+}} = 1000 GeV" elif "M_800" in datasetName: styles.signalStyleHToTB800.apply(eff) legend = "H^{+} m_{H^{+}} = 800 GeV" elif "M_200" in datasetName: styles.signalStyleHToTB200.apply(eff) legend = "H^{+} m_{H^{+}} = 200 GeV" else: styles.ttStyle.apply(eff) legend = "other" EfficiencyList.append(histograms.HistoGraph(eff, legend, "lp", "P")) saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split( "/")[-1] if "Pt" in numPath: xMin = 0.0 # rebinX = 2 xMax = 805.0 # xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Efficiency / " + str(binwidth) + " " + units yMin = 0.0 yMax = 1.1 elif "_Eta" in numPath: xMin = -3.0 xMax = +3.0 xTitle = "#eta" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 elif "_Mass" in numPath: xMin = 50.0 xMax = 300 xTitle = "M (GeV/c^{2})" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 elif "_Phi" in numPath: xMin = -3 xMax = +3 xTitle = "#phi" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 else: xMin = 0.0 xMax = 250.0 xTitle = "xTitle" yTitle = "yTitle" yMin = 0.0 yMax = 1.1 if "Fake" in numPath: # xMin = 95.0 # rebinX = 4 xMax = 805.0 xTitle = "candidate p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Misid rate / " + str(binwidth) + " " + units yMin = 0.0 yMax = 0.11 if "Event" in numPath: rebinX = 2 # xMin = 95.0 xMax = 805.0 xTitle = "candidate p_{T} (GeV/c)" units = "GeV/c" _format = "%0.1f" + units yTitle = "Efficiency / " + str(binwidth) + " " + units yMin = 0.0 yMax = 1.1 if "NonMatched" in numPath: xMin = 90.0 rebinX = 4 xMax = 700.0 xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} (GeV)" yTitle = "Efficiency" yMin = 0.0 yMax = 0.15 if "AllTopQuarkPt_MatchedBDT" in numPath and "TopQuarkPt" in denPath: xMin = 0.0 # rebinX = 4 xMax = 805.0 #705 units = "GeV/c" xTitle = "generated top p_{T} (GeV/c)" yTitle = "Efficiency / " + str(binwidth) + " " + units yMin = 0.0 yMax = 1.1 if "SameFake" in numPath: xMin = 95.0 rebinX = 4 xMax = 705.0 xMax = 555.0 # For topPt < 500GeV xTitle = "p_{T} [GeV]" yTitle = "Efficiency" yMin = 0.0 yMax = 1.1 options = {"ymin": yMin, "ymax": yMax, "xmin": xMin, "xMax": xMax} # if "TT" in datasetName and ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # return # if "M_" in datasetName and not ("Higgs" in numPath or "LdgBjetPt_isLdgFreeBjet" in numPath): # return p = plots.PlotBase(datasetRootHistos=EfficiencyList, saveFormats=kwargs.get("saveFormats")) #p = plots.ComparisonManyPlot(refEff, EfficiencyList, saveFormats=[]) p.createFrame(saveName, opts=options) # p.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(kwargs.get("rebinX"))) # Set Titles # p.getFrame().GetYaxis().SetTitle(kwargs.get("ylabel")) #"ylabel" p.getFrame().GetXaxis().SetTitle(xTitle) p.getFrame().GetYaxis().SetTitle(yTitle) # Set range p.getFrame().GetXaxis().SetRangeUser(xMin, xMax) moveLegend = {"dx": -0.55, "dy": -0.02, "dh": -0.2} # moveLegend = {"dx": -0.55, "dy": -0.01, "dh": -0.1} p.setLegend(histograms.moveLegend(histograms.createLegend(), **moveLegend)) # Add Standard Texts to plot histograms.addStandardTexts() p.draw() # Save plot in all formats savePath = os.path.join(opts.saveDir, "HplusMasses", numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut # SavePlot(p, saveName, savePath) SavePlot(p, saveName, save_path) return
def GetEfficiency(datasetsMgr, datasets, numerator="Numerator",denominator="Denominator", **kwargs): ''' TEfficiency method: See https://root.cern.ch/doc/master/classTEfficiency.html ''' lumi = GetLumi(datasetsMgr) # Select Statistic Options statOption = ROOT.TEfficiency.kFCP ''' statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson statOption = ROOT.TEfficiency.kFNormal # Normal Approximation statOption = ROOT.TEfficiency.kFWilson # Wilson statOption = ROOT.TEfficiency.kFAC # Agresti-Coull statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior statOption = ROOT.TEfficiency.kBBayesian # Custom Prior ''' first = True teff = ROOT.TEfficiency() # teff.SetStatisticOption(statOption) # For-loop: All datasets for dataset in datasets: num = dataset.getDatasetRootHisto(numerator) den = dataset.getDatasetRootHisto(denominator) # if dataset.isMC(): num.normalizeToLuminosity(lumi) den.normalizeToLuminosity(lumi) # Get Numerator and Denominator n = num.getHistogram() d = den.getHistogram() if d.GetEntries() == 0 or n.GetEntries() == 0: msg = "Denominator Or Numerator has no entries" Print(ErrorStyle() + msg + NormalStyle(), True) continue # Check Negatives CheckNegatives(n, d, True) # Remove Negatives RemoveNegatives(n) #RemoveNegatives(d) NumeratorBins = n.GetNbinsX() DenominatorBins = d.GetNbinsX() # Sanity Check if (NumeratorBins != DenominatorBins) : raise Exception("Numerator and Denominator Bins are NOT equal!") nBins = d.GetNbinsX() xMin = d.GetXaxis().GetXmin() xMax = d.GetXaxis().GetXmax() # ----------------------------------------------------------------------------------------- # # Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows # ----------------------------------------------------------------------------------------- # if 0: print "\n" print "=========== getEfficiency:" print "Dataset = ", dataset.getName() print "Numerator :", n.GetName(), " entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1) print "Denominator:", d.GetName(), " entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1) print "\n" print ">>>>>> Sanity Check: <<<<<<" print "Numerator Mean = ", n.GetMean() print "Numerator RMS = ", n.GetRMS() print "Numerator Integral = ", n.Integral(1, nBins) print "Denominator Mean = ", d.GetMean() print "Denominator RMS = ", d.GetRMS() print "Denominator Integral = ", d.Integral(1, nBins) if (n.GetMean() == 0 or d.GetMean() == 0): continue if (n.GetRMS() == 0 or d.GetRMS() == 0): continue if (n.Integral(1,nBins) == 0 or d.Integral(1,nBins) == 0): continue Verbose("Passed the sanity check", True) eff = ROOT.TEfficiency(n, d) eff.SetStatisticOption(statOption) # For-loop: All bins if 0: for iBin in range(1, nBins+1): print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin), " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin) weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) if first: teff = eff first = False if dataset.isData(): tn = n td = d else: teff.Add(eff) if dataset.isData(): tn.Add(n) td.Add(d) if dataset.isData(): teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(statOption) Verbose("Final tEff", True) if 0: for iBin in range(1,nBins+1): print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight() return convert2TGraph(teff)
def GetCutEfficiencyHisto(dataset, histoName, statOpt, **kwargs): ''' See https://root.cern.ch/doc/master/classTEfficiency.html ''' HasKeys(["verbose", "normalizeTo", "cutDirection"], **kwargs) verbose = kwargs.get("verbose") normalizeTo = kwargs.get("normalizeTo") cutDirection= kwargs.get("cutDirection") Verbose("Calculating the cut-efficiency (%s) for histo with name %s" % (cutDirection, histoName) ) # Choose statistics options statOpts = ["kFCP", "kFNormal", "KFWilson", "kFAC", "kFFC", "kBJeffrey", "kBUniform", "kBayesian"] if statOpt not in statOpts: raise Exception("Invalid statistics option \"%s\". Please choose one from the following:\n\t%s" % (statOpt, "\n\t".join(statOpts))) if statOpt == "kFCP": statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson elif statOpt == "kFNormal": statOption = ROOT.TEfficiency.kFNormal # Normal Approximation elif statOpt == "kFWilson": statOption = ROOT.TEfficiency.kFWilson # Wilson elif statOpt == "kFAC": statOption = ROOT.TEfficiency.kFAC # Agresti-Coull elif statOpt == "kFFC": statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins elif statOpt == "kBJeffrey": statOption = ROOT.TEfficiency.kBJeffrey # Jeffrey elif statOpt == "kBUniform": statOption = ROOT.TEfficiency.kBUniform # Uniform Prior elif statOpt == "kBayesian": statOption = ROOT.TEfficiency.kBayesian # Custom Prior else: raise Exception("This should never be reached") # Declare variables & options first = True isData = False teff = ROOT.TEfficiency() # Get the ROOT histogram rootHisto = dataset.getDatasetRootHisto(histoName) # Normalise the histogram NormalizeRootHisto(datasetsMgr, rootHisto, dataset.isMC(), normalizeTo) #NormalizeRootHisto(datasetsMgr, rootHisto, d.isMC(), normalizeTo) ## Get a clone of the wrapped histogram normalized as requested. h = rootHisto.getHistogram() titleX = h.GetXaxis().GetTitle() binWidth = h.GetXaxis().GetBinWidth(0) titleY = "efficiency (%s) / %s" % (cutDirection, GetBinwidthDecimals(binWidth) % (binWidth) ) # If empty return if h.GetEntries() == 0: return # Create the numerator/denominator histograms numerator = h.Clone("Numerator") denominator = h.Clone("Denominator") # Reset the numerator/denominator histograms numerator.Reset() denominator.Reset() # Calculate the instances passing a given cut (all bins) nBinsX = h.GetNbinsX()+1 for iBin in range(1, nBinsX): nTotal = h.Integral(0, nBinsX) if cutDirection == ">": nPass = h.Integral(iBin+1, nBinsX) elif cutDirection == "<": nPass = nTotal - h.Integral(iBin+1, nBinsX) else: raise Exception("Invalid cut direction \"%s\". Please choose either \">\" or \"<\"" % (cutDirection)) # Sanity check if nPass < 0: nPass = 0 # Fill the numerator/denominator histograms # print "iBin = %s, nPass = %s, nTotal = %s" % (iBin, nPass, nTotal) numerator.SetBinContent(iBin, nPass) numerator.SetBinError(iBin, math.sqrt(nPass)/10) # denominator.SetBinContent(iBin, nTotal) denominator.SetBinError(iBin, math.sqrt(nTotal)/10) # Check for negative values CheckNegatives(numerator, denominator) # Create TEfficiency object using the two histos eff = ROOT.TEfficiency(numerator, denominator) eff.SetStatisticOption(statOption) Verbose("The statistic option was set to %s" % (eff.GetStatisticOption()) ) # Save info in a table (debugging) table = [] hLine = "="*70 msgAlign = '{:<5} {:<20} {:<20} {:<20}' title = msgAlign.format("Bin", "Efficiency", "Error-Low", "Error-Up") table.append("\n" + hLine) table.append(title) table.append(hLine) for iBin in range(1, nBinsX): e = eff.GetEfficiency(iBin) errLow = eff.GetEfficiencyErrorLow(iBin) errUp = eff.GetEfficiencyErrorUp(iBin) values = msgAlign.format(iBin, e, errLow, errUp) table.append(values) table.append(hLine) # Verbose mode if verbose: for l in table: print l weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) if first: teff = eff if dataset.isData(): tn = numerator td = denominator first = False else: teff.Add(eff) if dataset.isData(): tn.Add(numerator) td.Add(denominator) if isData: teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(self.statOption) style = styleDict[dataset.getName()] return Convert2TGraph(teff, dataset, style, titleX, titleY)
def getEfficiency2D(datasetsMgr, datasets, numerator="Numerator", denominator="Denominator", **kwargs): ''' TEfficiency method: See https://root.cern.ch/doc/master/classTEfficiency.html ''' HasKeys(["verbose"], **kwargs) verbose = True #kwargs.get("verbose") lumi = GetLumi(datasetsMgr) # Select Statistic Options statOption = ROOT.TEfficiency.kFCP ''' statOption = ROOT.TEfficiency.kFCP # Clopper-Pearson statOption = ROOT.TEfficiency.kFNormal # Normal Approximation statOption = ROOT.TEfficiency.kFWilson # Wilson statOption = ROOT.TEfficiency.kFAC # Agresti-Coull statOption = ROOT.TEfficiency.kFFC # Feldman-Cousins statOption = ROOT.TEfficiency.kBBJeffrey # Jeffrey statOption = ROOT.TEfficiency.kBBUniform # Uniform Prior statOption = ROOT.TEfficiency.kBBayesian # Custom Prior ''' print "getEfficiency function" first = True teff = ROOT.TEfficiency() # teff.SetStatisticOption(statOption) print "Loop over Datasets" for dataset in datasets: print "Datasets" #datasets.normalizeMCByLuminosity() for dataset in datasets: num = dataset.getDatasetRootHisto(numerator) den = dataset.getDatasetRootHisto(denominator) if dataset.isMC(): num.normalizeToLuminosity(lumi) den.normalizeToLuminosity(lumi) #num.normalizeMCByLuminosity() #den.normalizeMCByLuminosity() # Get Numerator and Denominator n = num.getHistogram() d = den.getHistogram() #tn = None #td = None #n.normalizeMCByLuminosity() #d.normalizeMCByLuminosity() #n = dataset.getDatasetRootHisto(numerator).getHistogram() #d = dataset.getDatasetRootHisto(denominator).getHistogram() if d.GetEntries() == 0 or n.GetEntries() == 0: print "Denominator Or Numerator has no entries" continue # Check Negatives CheckNegatives(n, d, True) # Remove Negatives RemoveNegatives(n) #RemoveNegatives(d) NumeratorBins = n.GetNbinsX() DenominatorBins = d.GetNbinsX() # Sanity Check if (NumeratorBins != DenominatorBins): raise Exception("Numerator and Denominator Bins are NOT equal!") nBinsX = d.GetNbinsX() xMin = d.GetXaxis().GetXmin() xMax = d.GetXaxis().GetXmax() nBinsY = d.GetNbinsY() #yMin = d.GetYaxis().GetYmin() #yMax = d.GetYaxis().GetYmax() print("NoProblem till here asdasd...") # ----------------------------------------------------------------------------------------- # # Ugly hack to ignore EMPTY (in the wanted range) histograms with overflows/underflows # ----------------------------------------------------------------------------------------- # print "\n" print "=========== getEfficiency:" print "Dataset = ", dataset.getName() #print "Numerator :", n.GetName(), " entries=", n.GetEntries(), " Bins=", n.GetNbinsX(), " Low edge=", n.GetBinLowEdge(1) #print "Denominator:", d.GetName(), " entries=", d.GetEntries(), " Bins=", d.GetNbinsX(), " Low edge=", d.GetBinLowEdge(1) print "\n" print ">>>>>> Sanity Check: <<<<<<" print "Numerator Mean = ", n.GetMean() print "Numerator RMS = ", n.GetRMS() print "Numerator Integral = ", n.Integral() print "Denominator Mean = ", d.GetMean() print "Denominator RMS = ", d.GetRMS() print "Denominator Integral = ", d.Integral() if (n.GetMean() == 0 or d.GetMean() == 0): continue if (n.GetRMS() == 0 or d.GetRMS() == 0): continue if (n.Integral() == 0 or d.Integral() == 0): continue print "Passed the sanity check" eff = ROOT.TEfficiency(n, d) eff.SetStatisticOption(statOption) # if "TT" in dataset.getName(): # print " " # print " TT sample" #for iBin in range(1, nBins+1): # print iBin, "x=", n.GetBinLowEdge(iBin), " Num=", n.GetBinContent(iBin), " Den=", d.GetBinContent(iBin)," Eff=", eff.GetEfficiency(iBin) # "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0, ''' #if (verbose): print "\n" for iBin in range(1,nBins+1): #print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), " Denominator=", d.GetBinContent(iBin), " Efficiency=", eff.GetEfficiency(iBin\ ), " Weight=", eff.GetWeight() print "\n" ''' weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) #print "dataset=", dataset.getName(), "has weight=", weight #print " Efficiency plot has weight=", eff.GetWeight() if first: teff = eff first = False if dataset.isData(): tn = n td = d else: teff.Add(eff) #print " " #print "Adding eff to TEfficiency=" #for iBin in range(1, nBins+1): # print iBin, "x=", n.GetBinLowEdge(iBin), " Numerator=", n.GetBinContent(iBin), "Contrib. = ", n.GetBinContent(iBin)/n.Integral(1, nBins)*100.0, " Denominator=", d.GetBinContent(iBin), "Contrib. =", d.GetBinContent(iBin)/d.Integral(1, nBins)*100.0, " Efficiency=", teff.GetEfficiency(iBin), " Weight=", teff.GetWeight() if dataset.isData(): tn.Add(n) td.Add(d) if dataset.isData(): teff = ROOT.TEfficiency(tn, td) teff.SetStatisticOption(statOption) ''' print " ------------------------- Final Data Plot ------------------------- " print "Integral = ", tn.Integral(1, nBins) print "Numerator:" for iBin in range(1, nBins+1): print iBin, "x=", tn.GetBinLowEdge(iBin), " Bin Content = ", tn.GetBinContent(iBin), " Percentage=", tn.GetBinContent(iBin)/tn.Integral(1, nBins)*100.0 print "Denominator: " print "Integral = ", td.Integral(1,nBins) for iBin in range(1, nBins+1): print iBin, "x=", td.GetBinLowEdge(iBin), " Bin Content = ", td.GetBinContent(iBin), " Percentage=", td.GetBinContent(iBin)/td.Integral(1, nBins)*100 print "-------------------------------------------------------------------- " ''' print " -----------------> Final tEff" #for iBin in range(1,nBins+1): # print iBin, "x=", n.GetBinLowEdge(iBin)," Efficiency=", teff.GetEfficiency(iBin), " Weight = ", teff.GetWeight() return teff
def PlotEfficiency(datasetsMgr, numPath, denPath): # Definitions myList = [] _kwargs = GetHistoKwargs(numPath, opts) nx = 0 if len(_kwargs["binList"]) > 0: xBins = _kwargs["binList"] nx = len(xBins) - 1 counter = 0 # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) d = dataset.getDatasetRootHisto(denPath) num = n.getHistogram() den = d.getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Calculations total = den.Integral(0, den.GetXaxis().GetNbins() + 1) selected = num.Integral(0, num.GetXaxis().GetNbins() + 1) if 0: print "Numerical Efficiency", numPath, dataset.getName( ), ":", round(selected / total, 3) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP datasetTT = datasetsMgr.getDataset("TT") # Get the histograms numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram() denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram() if nx > 0: numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins) denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins) ''' for i in range(1, num.GetNbinsX()+1): nbin = num.GetBinContent(i) dbin = den.GetBinContent(i) nbinTT = numTT.GetBinContent(i) dbinTT = denTT.GetBinContent(i) eps = nbin/dbin epsTT = nbinTT/dbinTT ratioTT = eps/epsTT if ratioTT > 1: ratioTT = 1/ratioTT #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5) #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100 ''' eff_ref = ROOT.TEfficiency(numTT, denTT) eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph gEff = convert2TGraph(eff) gEffRef = convert2TGraph(eff_ref) # Style definitions stylesDef = styles.ttStyle styles0 = styles.signalStyleHToTB300 styles1 = styles.signalStyleHToTB500 styles2 = styles.signalStyleHToTB800 styles3 = styles.signalStyleHToTB500 styles4 = styles.signalStyleHToTB1000 styles5 = styles.signalStyleHToTB2000 styles6 = styles.signalStyleHToTB180 styles7 = styles.signalStyleHToTB3000 styles8 = styles.signalStyleHToTB200 if dataset.getName() == "TT": styles.ttStyle.apply(gEffRef) legend_ref = "t#bar{t}" if opts.type == "partonShower": legend_ref = "t#bar{t} (Pythia8)" elif opts.type == "evtGen": legend_ref = "t#bar{t} (Powheg)" refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P") else: styles.markerStyles[counter].apply(gEff) legend = dataset.getName().replace("TT_", "t#bar{t} (").replace( "isr", "ISR ").replace("fsr", "FSR ") legend = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace( "UP", "up") legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV") legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV") legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV") legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV") legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV") legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV") legend = legend.replace("TuneEE5C", "Herwig++") legend += ")" counter += 1 #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P")) myList.append(histograms.HistoGraph(gEff, legend, "p", "P")) # Define stuff numPath = numPath.replace("AfterAllSelections_", "") saveName = "Efficiency_%s_%s" % (opts.folder, opts.type) saveName = saveName.replace("__", "_Inclusive_") # Plot the efficiency p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[]) savePath = os.path.join(opts.saveDir, opts.optMode) plots.drawPlot(p, savePath, **_kwargs) # Save plot in all formats SavePlot(p, saveName, savePath, saveFormats=[".png", ".pdf", ".C"]) return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi): # Definitions myList = [] index = 0 _kwargs = GetHistoKwargs(numPath, opts) # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if "Fake" in numPath and "TT" in dataset.getName(): continue # Get the histograms #num = dataset.getDatasetRootHisto(numPath).getHistogram() #den = dataset.getDatasetRootHisto(denPath).getHistogram() n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() if "binList" in _kwargs: xBins = _kwargs["binList"] nx = len(xBins)-1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Remove negative bins and ensure numerator bin <= denominator bin CheckNegatives(num, den, True) # RemoveNegatives(num) # RemoveNegatives(den) # Sanity check (Histograms are valid and consistent) - Always false! # if not ROOT.TEfficiency.CheckConsistency(num, den): # continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) # # Set the weights - Why is this needed? if 0: weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) # Convert to TGraph eff = convert2TGraph(eff) # Apply default style (according to dataset name) plots._plotStyles[dataset.getName()].apply(eff) # Append in list myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) # Define save name saveName = "Eff_" + numPath.split("/")[-1] + "Over" + denPath.split("/")[-1] # Plot the efficiency p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[]) plots.drawPlot(p, saveName, **_kwargs) # Save plot in all formats savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode) #savePath = os.path.join(opts.saveDir, numPath.split("/")[0], opts.optMode) save_path = savePath + opts.MVAcut SavePlot(p, saveName, save_path, saveFormats = [".png", ".pdf", ".C"]) return
def PlotEfficiency(datasetsMgr, numPath, denPath): # Definitions myList = [] _kwargs = GetHistoKwargs(numPath, opts) nx = 0 if len(_kwargs["binList"]) > 0: xBins = _kwargs["binList"] nx = len(xBins)-1 counter = 0 # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): if dataset.isMC(): n = dataset.getDatasetRootHisto(numPath) d = dataset.getDatasetRootHisto(denPath) num = n.getHistogram() den = d.getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) else: num = dataset.getDatasetRootHisto(numPath).getHistogram() den = dataset.getDatasetRootHisto(denPath).getHistogram() if nx > 0: num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) # Calculations total = den.Integral(0, den.GetXaxis().GetNbins()+1) selected = num.Integral(0, num.GetXaxis().GetNbins()+1) if 0: print "Numerical Efficiency", numPath, dataset.getName(), ":", round(selected/total, 3) # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) eff.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP datasetTT = datasetsMgr.getDataset("TT") # Get the histograms numTT = datasetTT.getDatasetRootHisto(numPath).getHistogram() denTT = datasetTT.getDatasetRootHisto(denPath).getHistogram() if nx > 0: numTT = numTT.Rebin(nx, "", xBins) #num.Rebin(nx, "", xBins) denTT = denTT.Rebin(nx, "", xBins) #den.Rebin(nx, "", xBins) ''' for i in range(1, num.GetNbinsX()+1): nbin = num.GetBinContent(i) dbin = den.GetBinContent(i) nbinTT = numTT.GetBinContent(i) dbinTT = denTT.GetBinContent(i) eps = nbin/dbin epsTT = nbinTT/dbinTT ratioTT = eps/epsTT if ratioTT > 1: ratioTT = 1/ratioTT #print "bin: ", i, "eps: ", round(eps,5) , "epsTT: ", round(epsTT,5) #print "bin: ", i, "eps/epsTT: ", (1.0 - round(ratioTT, 3))*100 ''' eff_ref = ROOT.TEfficiency(numTT, denTT) eff_ref.SetStatisticOption(ROOT.TEfficiency.kFCP) #FCP # Convert to TGraph gEff = convert2TGraph(eff) gEffRef = convert2TGraph(eff_ref) # Style definitions stylesDef = styles.ttStyle styles0 = styles.signalStyleHToTB300 styles1 = styles.signalStyleHToTB500 styles2 = styles.signalStyleHToTB800 styles3 = styles.signalStyleHToTB500 styles4 = styles.signalStyleHToTB1000 styles5 = styles.signalStyleHToTB2000 styles6 = styles.signalStyleHToTB180 styles7 = styles.signalStyleHToTB3000 styles8 = styles.signalStyleHToTB200 if dataset.getName() == "TT": styles.ttStyle.apply(gEffRef) legend_ref = "t#bar{t}" if opts.type == "partonShower": legend_ref = "t#bar{t} (Pythia8)" elif opts.type == "evtGen": legend_ref = "t#bar{t} (Powheg)" refGraph = histograms.HistoGraph(gEffRef, legend_ref, "p", "P") else: styles.markerStyles[counter].apply(gEff) legend = dataset.getName().replace("TT_", "t#bar{t} (").replace("isr", "ISR ").replace("fsr", "FSR ") legend = legend.replace("hdamp", "hdamp ").replace("DOWN", "down").replace("UP", "up") legend = legend.replace("mtop1665", "m_{t} = 166.5 GeV") legend = legend.replace("mtop1695", "m_{t} = 169.5 GeV") legend = legend.replace("mtop1715", "m_{t} = 171.5 GeV") legend = legend.replace("mtop1735", "m_{t} = 173.5 GeV") legend = legend.replace("mtop1755", "m_{t} = 175.5 GeV") legend = legend.replace("mtop1785", "m_{t} = 178.5 GeV") legend = legend.replace("TuneEE5C", "Herwig++") legend += ")" counter+=1 #myList.append(histograms.HistoGraph(gEff, legend, "lp", "P")) myList.append(histograms.HistoGraph(gEff, legend, "p", "P")) # Define stuff numPath = numPath.replace("AfterAllSelections_","") saveName = "Efficiency_%s_%s" % (opts.folder, opts.type) saveName = saveName.replace("__", "_Inclusive_") # Plot the efficiency p = plots.ComparisonManyPlot(refGraph, myList, saveFormats=[]) savePath = os.path.join(opts.saveDir, opts.optMode) plots.drawPlot(p, savePath, **_kwargs) # Save plot in all formats SavePlot(p, saveName, savePath, saveFormats = [".png", ".pdf", ".C"]) return
def PlotEfficiency(datasetsMgr, numPath, denPath, intLumi): # Definitions myList = [] myBckList = [] index = 0 _kwargs = GetHistoKwargs(denPath, opts) counter = 0 # For-loop: All datasets for dataset in datasetsMgr.getAllDatasets(): name_N = numPath name_D = denPath # Get the histograms #num = dataset.getDatasetRootHisto(numPath).getHistogram() #den = dataset.getDatasetRootHisto(denPath).getHistogram() #if "TT" in dataset.getName(): # numPath = numPath.replace("HiggsTop", "AllTop") # denPath = denPath.replace("HiggsTop", "AllTop") # numPath = numPath.replace("AssocTop", "AllTop") # denPath = denPath.replace("AssocTop", "AllTop") n = dataset.getDatasetRootHisto(numPath) n.normalizeToLuminosity(intLumi) num = n.getHistogram() d = dataset.getDatasetRootHisto(denPath) d.normalizeToLuminosity(intLumi) den = d.getHistogram() if "binList" in _kwargs: xBins = _kwargs["binList"] nx = len(xBins)-1 num = num.Rebin(nx, "", xBins) den = den.Rebin(nx, "", xBins) for i in range(1, num.GetNbinsX()+1): nbin = num.GetBinContent(i) dbin = den.GetBinContent(i) #print dataset.getName(), nbin, dbin if (nbin > dbin): print "error" # Sanity checks if den.GetEntries() == 0 or num.GetEntries() == 0: continue if num.GetEntries() > den.GetEntries(): continue # Remove negative bins and ensure numerator bin <= denominator bin #CheckNegatives(num, den, False) #CheckNegatives(num, den, True) #RemoveNegatives(num) #RemoveNegatives(den) # Sanity check (Histograms are valid and consistent) - Always false! # if not ROOT.TEfficiency.CheckConsistency(num, den): # continue # Create Efficiency plots with Clopper-Pearson stats eff = ROOT.TEfficiency(num, den) # fixme: investigate warnings eff.SetStatisticOption(ROOT.TEfficiency.kFCP) # # Set the weights - Why is this needed? if 0: weight = 1 if dataset.isMC(): weight = dataset.getCrossSection() eff.SetWeight(weight) # Convert to TGraph eff = convert2TGraph(eff) # Apply default style (according to dataset name) plots._plotStyles[dataset.getName()].apply(eff) # Apply random histo styles and append if "charged" in dataset.getName().lower(): counter +=1 mass = dataset.getName().split("M_")[-1] styles.markerStyles[counter].apply(eff) if "300" in mass or "650" in mass: s = styles.getSignalStyleHToTB_M(mass) s.apply(eff) eff.SetLineStyle(ROOT.kSolid) eff.SetLineWidth(3) eff.SetMarkerSize(1.2) ''' mass = dataset.getName().split("M_")[-1] mass = mass.replace("650", "1000") s = styles.getSignalStyleHToTB_M(mass) s.apply(eff) ''' ''' ttStyle = styles.getEWKLineStyle() if "tt" in dataset.getName().lower(): ttStyle.apply(eff) ''' # Append in list #if "charged" in dataset.getName().lower(): # if "m_500" in dataset.getName().lower(): if 1: #if "tt" in dataset.getName().lower(): if "m_500" in dataset.getName().lower(): eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P") else: myList.append(histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P")) #elif "tt" in dataset.getName().lower(): # eff_ref = histograms.HistoGraph(eff, plots._legendLabels[dataset.getName()], "lp", "P") # Define save name saveName = "Eff_" + name_N.split("/")[-1] + "Over"+ name_D.split("/")[-1] # Plot the efficiency #p = plots.PlotBase(datasetRootHistos=myList, saveFormats=[]) p = plots.ComparisonManyPlot(eff_ref, myList, saveFormats=[]) plots.drawPlot(p, saveName, **_kwargs) # Save plot in all formats savePath = os.path.join(opts.saveDir, name_N.split("/")[0], opts.optMode) SavePlot(p, saveName, savePath, saveFormats = [".png", ".C", ".pdf"])#, ".pdf"]) return