def main(filename1, filename2, outdir, regexp, verbose) : # there are too many histos; by default compare only the ones vs. pt for few selections relevantHistograms = ['l_pt_den','l_pt_num'] relevantSelections = ['CRPremT2','CR_SSInc','CR_WHSS','CRPremT2','CR_SSInc','CR_WHSS'] outdir = outdir if outdir else guessOutdirFromInputs(filename1, filename2) if verbose : print "saving output plots to '%s'"%outdir file1, file2 = r.TFile.Open(filename1), r.TFile.Open(filename2) histonames1 = getAllHistoNames(file1, onlyTH1=True) histonames2 = getAllHistoNames(file2, onlyTH1=True) if verbose : print '\n'.join(["%s: %d histograms"%(f, len(hs)) for f, hs in [(filename1, histonames1), (filename2, histonames2)]]) commonHistos = [h for h in histonames1 if h in histonames2] def diff(a, b) : return list(set(a)-set(b)) if len(commonHistos)!=len(histonames2) : missFrom1, missFrom2 = diff(histonames2, histonames1), diff(histonames1, histonames2) print ('histograms not in common:\n' +"%d missing from %s\n"%(len(missFrom1), filename1) #+'\n\t'.join(missFrom1) +"%d missing from %s\n"%(len(missFrom2), filename2) #+'\n\t'.join(missFrom2) ) label1, label2 = labelFromFilename(filename1), labelFromFilename(filename2) commonHistos = (filterWithRegexp(commonHistos, regexp) if regexp is not None else filter(lambda h: any(s in h for s in relevantHistograms) and any(s in h for s in relevantSelections), commonHistos)) canvas = r.TCanvas('diff_fakeMatrix','diff_fakeMatrix') for h in commonHistos : outname = outdir+'/'+h h1, h2 = file1.Get(h), file2.Get(h) plotComparison(h1, h2, canvas, outname, label1, label2, verbose)
def processFile(filename, outdir, label=''): file = r.TFile.Open(filename) outfname = (outdir + '/%(histoname)s_' + os.path.basename(filename).replace('.root', '.png')) histonames = getAllHistoNames(file, onlyTH1=True) probHistos = [ buildProbHisto(file, n, d) for n, d in findHistonamePairs(histonames, '_num', '_den') ] probHistos = filter( None, probHistos) # remove empty graphs that make TAxis complain c = r.TCanvas('p_tight', '') for ph in probHistos: c.cd() c.Clear() if label: ph.SetTitle("%s %s" % (ph.GetTitle(), ", %s" % label if label else '')) ph.Draw('ap') xAx, yAx = ph.GetXaxis(), ph.GetYaxis() yAx.SetRangeUser(0.0, 1.1) l = referenceLine(xAx.GetXmin(), xAx.GetXmax()) l.Draw() c.Update() c.SaveAs(outfname % {'histoname': ph.GetName()})
def main(filename1, filename2, outdir, regexp, verbose): # there are too many histos; by default compare only the ones vs. pt for few selections relevantHistograms = ['l_pt_den', 'l_pt_num'] relevantSelections = [ 'CRPremT2', 'CR_SSInc', 'CR_WHSS', 'CRPremT2', 'CR_SSInc', 'CR_WHSS' ] outdir = outdir if outdir else guessOutdirFromInputs(filename1, filename2) if verbose: print "saving output plots to '%s'" % outdir file1, file2 = r.TFile.Open(filename1), r.TFile.Open(filename2) histonames1 = getAllHistoNames(file1, onlyTH1=True) histonames2 = getAllHistoNames(file2, onlyTH1=True) if verbose: print '\n'.join([ "%s: %d histograms" % (f, len(hs)) for f, hs in [(filename1, histonames1), (filename2, histonames2)] ]) commonHistos = [h for h in histonames1 if h in histonames2] def diff(a, b): return list(set(a) - set(b)) if len(commonHistos) != len(histonames2): missFrom1, missFrom2 = diff(histonames2, histonames1), diff(histonames1, histonames2) print('histograms not in common:\n' + "%d missing from %s\n" % (len(missFrom1), filename1) #+'\n\t'.join(missFrom1) + "%d missing from %s\n" % (len(missFrom2), filename2) #+'\n\t'.join(missFrom2) ) label1, label2 = labelFromFilename(filename1), labelFromFilename(filename2) commonHistos = (filterWithRegexp(commonHistos, regexp) if regexp is not None else filter( lambda h: any(s in h for s in relevantHistograms) and any(s in h for s in relevantSelections), commonHistos)) canvas = r.TCanvas('diff_fakeMatrix', 'diff_fakeMatrix') for h in commonHistos: outname = outdir + '/' + h h1, h2 = file1.Get(h), file2.Get(h) plotComparison(h1, h2, canvas, outname, label1, label2, verbose)
def mergeFiles(targetFile, filenames, regex) : infiles = [r.TFile.Open(f) for f in filenames] histonames = [h for h in getAllHistoNames(infiles[0]) if re.search(regex, h)] scale = 1.0/len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames : h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:] : h.Add(inf.Get(hn), scale) h.Write() outFile.Close()
def main(filenameOrig, filenameDest, substr1, substr2) : s1, s2 = substr1, substr2 input = r.TFile.Open(filenameOrig) histonames = getAllHistoNames(input) output = r.TFile.Open(filenameDest, 'recreate') output.cd() nhFixed = 0 for hn in histonames : h = input.Get(hn) hnn = hn.replace(s1,s2) if s1 in hn else hn.replace(s2,s1) # avoid swapping twice s1->s2->s1 h.Write(hnn) if hn!=hnn : nhFixed += 1 print "renamed %d histograms"%nhFixed output.Close() input.Close()
def mergeFiles(targetFile, filenames, regex): infiles = [r.TFile.Open(f) for f in filenames] histonames = [ h for h in getAllHistoNames(infiles[0]) if re.search(regex, h) ] scale = 1.0 / len(infiles) outFile = r.TFile.Open(targetFile, 'recreate') outFile.cd() for hn in histonames: h = infiles[0].Get(hn).Clone() h.Scale(scale) for inf in infiles[1:]: h.Add(inf.Get(hn), scale) h.Write() outFile.Close()
def main(filenameOrig, filenameDest, substr1, substr2): s1, s2 = substr1, substr2 input = r.TFile.Open(filenameOrig) histonames = getAllHistoNames(input) output = r.TFile.Open(filenameDest, 'recreate') output.cd() nhFixed = 0 for hn in histonames: h = input.Get(hn) hnn = hn.replace(s1, s2) if s1 in hn else hn.replace( s2, s1) # avoid swapping twice s1->s2->s1 h.Write(hnn) if hn != hnn: nhFixed += 1 print "renamed %d histograms" % nhFixed output.Close() input.Close()
def processFile(filename, outdir, label='') : file = r.TFile.Open(filename) outfname = (outdir+'/%(histoname)s_' +os.path.basename(filename).replace('.root','.png')) histonames = getAllHistoNames(file, onlyTH1=True) probHistos = [buildProbHisto(file, n, d) for n,d in findHistonamePairs(histonames, '_num', '_den')] probHistos = filter(None, probHistos) # remove empty graphs that make TAxis complain c = r.TCanvas('p_tight','') for ph in probHistos : c.cd() c.Clear() if label : ph.SetTitle("%s %s"%(ph.GetTitle(), ", %s"%label if label else '')) ph.Draw('ap') xAx, yAx = ph.GetXaxis(), ph.GetYaxis() yAx.SetRangeUser(0.0, 1.1) l = referenceLine(xAx.GetXmin(), xAx.GetXmax()) l.Draw() c.Update() c.SaveAs(outfname%{'histoname':ph.GetName()})
'pickleFile']]) print 'Input files:\n'+'\n'.join(inputFileNames) # navigate the files and collect the histos referenceType = HistoType(pr='', ch=channel, var=referenceHisto, syst=referenceSyst) histosByType = collections.defaultdict(list) classifier = HistoNameClassifier() histoNames = [] for fname, infile in zip(inputFileNames, inputFiles) : sample = guessGroupFromFilename(fname) setType, setSample = setHistoType, setHistoSample def getType(histoName) : return classifier.histoType(histoName) def isRightType(histo) : return referenceType.matchAllAvailabeAttrs(histo.type) histonamesCached = len(histoNames)>0 if not histonamesCached : histoNames = getAllHistoNames(infile, onlyTH1=True, nameStem=histoname) histos = filter(isRightType, map(lambda hn : setSample(setType(infile.Get(hn), getType(hn)), sample), histoNames)) if not histonamesCached : histoNames = [h.GetName() for h in histos] # after filtering organizeHistosByType(histosByType, histos) refHistos = histosByType # already filtered histonames, all histosByType are refHistos allSamples = sorted(list(set([h.sample for histos in refHistos.values() for h in histos]))) allSamples += ['totbkg'] if printTotBkg else [] allSelects = sorted(list(set([k.pr for k in histosByType.keys()]))) if verbose : print 'allSamples : ',allSamples if verbose : print 'allSelects : ',allSelects # get the counts (adding up what needs to be merged by samplename) sampleCountsPerSel = dict() # counts[sample][sel] countsSampleSel = dict([(s, collections.defaultdict(float)) for s in allSamples])
plotRegions = options.regions.split(',') referenceSyst = options.syst verbose = options.verbose assert channel in validChannels,"Invalid channel %s (should be one of %s)" % (channel, str(validChannels)) inputFileNames = glob.glob(inputDir+'/'+'*'+prodTag+'*.root') + glob.glob(signalFname) inputFiles = [r.TFile.Open(f) for f in inputFileNames] assert len(inputFileNames)==len(inputFiles),"Cannot open some of the input files" refHistoType = HistoType(pr='', ch=channel, var=referenceHisto, syst=referenceSyst) histosByType = collections.defaultdict(list) classifier = HistoNameClassifier() for fname, infile in zip(inputFileNames, inputFiles) : samplename = guessSampleFromFilename(fname) histoNames = [n for n in getAllHistoNames(infile, onlyTH1=True) if refHistoType.matchAllAvailabeAttrs( classifier.histoType( n ) )] histos = [infile.Get(hn) for hn in histoNames] for h in histos : setHistoType(h, classifier.histoType(h.GetName())) setHistoSample(h, samplename) histos = [h for h in histos if h.type.pr in plotRegions] organizeHistosByType(histosByType, histos) refHistos = histosByType # already filtered histonames, all histosByType are refHistos def isSignal(sampleName) : return 'WH_' in sampleName allSamples = list(set([h.sample for histos in refHistos.values() for h in histos])) allBkgNames = [s for s in allSamples if not isSignal(s)] sigName = next(s for s in allSamples if isSignal(s)) if verbose : print '\n'.join("%s : %s" % (s,l) for s,l in zip(['bkg','sig'], [str(allBkgNames), sigName]))
'*.root') + glob.glob(signalFname) inputFiles = [r.TFile.Open(f) for f in inputFileNames] assert len(inputFileNames) == len( inputFiles), "Cannot open some of the input files" refHistoType = HistoType(pr='', ch=channel, var=referenceHisto, syst=referenceSyst) histosByType = collections.defaultdict(list) classifier = HistoNameClassifier() for fname, infile in zip(inputFileNames, inputFiles): samplename = guessSampleFromFilename(fname) histoNames = [ n for n in getAllHistoNames(infile, onlyTH1=True) if refHistoType.matchAllAvailabeAttrs(classifier.histoType(n)) ] histos = [infile.Get(hn) for hn in histoNames] for h in histos: setHistoType(h, classifier.histoType(h.GetName())) setHistoSample(h, samplename) histos = [h for h in histos if h.type.pr in plotRegions] organizeHistosByType(histosByType, histos) refHistos = histosByType # already filtered histonames, all histosByType are refHistos def isSignal(sampleName): return 'WH_' in sampleName
signalScale = options.sigScale justTest = options.test verbose = options.verbose inputFileNames = glob.glob(inputDir+'/'+'*'+prodTag+'*.root') + glob.glob(signalFname) print 'input files:\n'+'\n'.join(inputFileNames) inputFiles = [r.TFile.Open(f) for f in inputFileNames] histosByType = collections.defaultdict(list) classifier = HistoNameClassifier() for fname, infile in zip(inputFileNames, inputFiles) : print '-'*3 + fname + '-'*3 samplename = guessSampleFromFilename(fname) histoNames = getAllHistoNames(inputFiles[0], onlyTH1=True) histoNames = [h for h in histoNames if any([h.startswith(p) for p in ['sr6', 'sr7', 'sr8', 'sr9']])] if justTest : histoNames = histoNames[:10] # just get 10 histos to run quick tests histos = [infile.Get(hn) for hn in histoNames] for h in histos : setHistoType(h, classifier.histoType(h.GetName())) setHistoSample(h, samplename) organizeHistosByType(histosByType, histos) def isSignal(sampleName) : return 'WH_' in sampleName def cumsum(l, leftToRight=True) : #return numpy.cumsum(l) # not available ? return [sum(l[:i]) for i in range(1,len(l)+1)] if leftToRight \ else [sum(l[-i:]) for i in range(1,len(l)+1)][::-1] def mergeOuter(bc, nOuter=2) : # add over/underflow in the first/last bin
def printHistoNames(inputFile) : print 'histograms from ',inputFile.GetName() print '\n'.join(getAllHistoNames(inputFile))