class CachePlot(object): def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None): self.config = config self.sampleIdentifier = sampleIdentifier self.regions = list(set(regions)) self.forceRedo = forceRedo self.sampleTree = None self.samplesPath = self.config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.sampleNames = list( eval(self.config.get('Plot_general', 'samples'))) self.dataNames = list(eval(self.config.get('Plot_general', 'Data'))) self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames) self.regionsDict = {} for region in self.regions: treeCut = config.get('Cuts', region) self.regionsDict[region] = {'cut': treeCut} self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace) def printInfo(self): print("REGION:".ljust(24), "CUT:") for region, regionInfo in self.regionsDict.iteritems(): print(" > ", region.ljust(20), regionInfo['cut']) def run(self): # keep additional branches for plotting try: keepBranchesPlot = eval( self.config.get('Branches', 'keep_branches_plot')) except: keepBranchesPlot = [] try: keepBranchesPlot += eval( self.config.get('Branches', 'keep_branches')) except: pass # also keep some branches which might be used later in variables definition and weights try: for section in self.config.sections(): try: if section.startswith( 'plotDef:') and self.config.has_option( section, 'relPath'): keepBranchesPlot.append( self.config.get(section, 'relPath')) except Exception as e: print("\x1b[31mWARNING: config error in:", section, "=>", e, "\x1b[0m") except Exception as e2: print( "\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m" ) print(e2) try: keepBranchesPlot.append(self.config.get('Weights', 'weightF')) except: pass # plotting region cut for region, regionInfo in self.regionsDict.iteritems(): keepBranchesPlot.append(regionInfo['cut']) keepBranchesPlotFinal = BranchList( keepBranchesPlot).getListOfBranches() print("KEEP:", keepBranchesPlotFinal) # ---------------------------------------------------------------------------------------------------------------------- # cache samples # ---------------------------------------------------------------------------------------------------------------------- for sampleToCache in [self.sampleIdentifier]: print('*' * 80) print(' ', sampleToCache) print('*' * 80) # prepare caches for training and evaluation samples treeCaches = [] sampleTree = None # for all (sub)samples which come from the same files (sampleIdentifier) subsamples = [ x for x in self.samples if x.identifier == sampleToCache ] for sample in subsamples: # add cuts for all training regions for region, regionInfo in self.regionsDict.iteritems(): configSection = 'Plot:%s' % region # cuts sampleCuts = [sample.subcut] if regionInfo['cut']: sampleCuts.append(regionInfo['cut']) if self.config.has_option(configSection, 'Datacut'): sampleCuts.append( self.config.get(configSection, 'Datacut')) if self.config.has_option('Plot_general', 'addBlindingCut'): sampleCuts.append( self.config.has_option('Plot_general', 'addBlindingCut')) # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'plot:{region}_{sample}'.format( region=region, sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, inputFolder=self.samplesPath, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, branches=keepBranchesPlotFinal, config=self.config, debug=True) # check if this part of the sample is already cached isCached = tc.partIsCached() if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree( { 'name': sample.identifier, 'folder': self.samplesPath }, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print( "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m" ) raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk( self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print( "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m" ) raise Exception("SampleFilesHaveChanged") treeCaches.append( tc.setSampleTree(self.sampleTree).cache()) else: print("INFO: already cached!", tc, "(", tc.hash, ")") if len(treeCaches) > 0: # run on the tree self.sampleTree.process() else: print("nothing to do!")
class CachePlot(object): def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None): self.config = config self.sampleIdentifier = sampleIdentifier self.regions = list(set(regions)) self.forceRedo = forceRedo self.sampleTree = None self.samplesPath = self.config.get('Directories', 'plottingSamples') self.samplesDefinitions = self.config.get('Directories','samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.sampleNames = eval(self.config.get('Plot_general', 'samples')) self.dataNames = eval(self.config.get('Plot_general', 'Data')) self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames) self.regionsDict = {} for region in self.regions: treeCut = config.get('Cuts', region) self.regionsDict[region] = {'cut': treeCut} self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None VHbbNameSpace=config.get('VHbbNameSpace','library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode) else: print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace) def printInfo(self): print ("REGION:".ljust(24),"CUT:") for region,regionInfo in self.regionsDict.iteritems(): print (" > ",region.ljust(20), regionInfo['cut']) def run(self): # keep additional branches for plotting try: keepBranchesPlot = eval(self.config.get('Branches', 'keep_branches_plot')) except: keepBranchesPlot = [] try: keepBranchesPlot += eval(self.config.get('Branches', 'keep_branches')) except: pass # also keep some branches which might be used later in variables definition and weights try: for section in self.config.sections(): if section.startswith('plotDef:') and self.config.has_option(section, 'relPath'): keepBranchesPlot.append(self.config.get(section, 'relPath')) except Exception as e: print("\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m") print(e) try: keepBranchesPlot.append(self.config.get('Weights', 'weightF')) except: pass # plotting region cut for region,regionInfo in self.regionsDict.iteritems(): keepBranchesPlot.append(regionInfo['cut']) keepBranchesPlotFinal = BranchList(keepBranchesPlot).getListOfBranches() print("KEEP:", keepBranchesPlotFinal) # ---------------------------------------------------------------------------------------------------------------------- # cache samples # ---------------------------------------------------------------------------------------------------------------------- for sampleToCache in [self.sampleIdentifier]: print ('*'*80) print (' ',sampleToCache) print ('*'*80) # prepare caches for training and evaluation samples treeCaches = [] sampleTree = None # for all (sub)samples which come from the same files (sampleIdentifier) subsamples = [x for x in self.samples if x.identifier == sampleToCache] for sample in subsamples: # add cuts for all training regions for region,regionInfo in self.regionsDict.iteritems(): configSection = 'Plot:%s'%region # cuts sampleCuts = [sample.subcut] if regionInfo['cut']: sampleCuts.append(regionInfo['cut']) if self.config.has_option(configSection, 'Datacut'): sampleCuts.append(self.config.get(configSection, 'Datacut')) if self.config.has_option('Plot_general','addBlindingCut'): sampleCuts.append(self.config.has_option('Plot_general', 'addBlindingCut')) # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'plot:{region}_{sample}'.format(region=region, sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, inputFolder=self.samplesPath, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, branches=keepBranchesPlotFinal, config=self.config, debug=True ) # check if this part of the sample is already cached isCached = tc.partIsCached() if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m") raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m") raise Exception("SampleFilesHaveChanged") treeCaches.append(tc.setSampleTree(self.sampleTree).cache()) else: print ("INFO: already cached!",tc, "(",tc.hash,")") if len(treeCaches) > 0: # run on the tree self.sampleTree.process() else: print ("nothing to do!")
class CacheDatacards(object): def __init__(self, config, regions, sampleToCache, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None, verbose=False): self.verbose = verbose or ('XBBDEBUG' in os.environ) self.config = config self.regions = regions self.treeCaches = [] self.sampleTree = None self.sampleToCache = sampleToCache self.forceRedo = forceRedo # settings which part of input files to process self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None # initialize Datacard objects self.dcMakers = [ Datacard(config=self.config, region=region) for region in self.regions ] # make a minimum list of samples which is needed to produce all the Datacard regions at the same time def getAllSamples(self): samples = [] for dcMaker in self.dcMakers: for sample in dcMaker.getAllSamples(): if len([x for x in samples if x.name == sample.name]) < 1: samples.append(sample) return samples def prepare(self): if len(self.dcMakers) > 0: self.treeCaches = [] self.sampleTree = None # cuts allSamples = self.getAllSamples() subsamples = [ x for x in allSamples if x.identifier == self.sampleToCache ] # loop over all datacard regions for dcMaker in self.dcMakers: # loop over all subsamples (which come from the same root tree files) for sample in subsamples: # combine subcut and systematics cut with logical AND # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics isData = (sample.type == 'DATA') systematicsCuts = sorted( list( set([ x['cachecut'] for x in dcMaker.getSystematicsList( isData=isData) ]))) sampleCuts = { 'AND': [sample.subcut, { 'OR': systematicsCuts }] } if self.verbose: print( json.dumps(sampleCuts, sort_keys=True, indent=8, default=str)) # make list of branches to keep in root file branchList = BranchList(sample.subcut) branchList.addCut( [x['cachecut'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['cut'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['var'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['weight'] for x in dcMaker.getSystematicsList()]) branchList.addCut(self.config.get('Weights', 'weightF')) branchList.addCut( eval(self.config.get('Branches', 'keep_branches'))) branchesToKeep = branchList.getListOfBranches() # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'dc:{region}_{sample}'.format( region=dcMaker.getRegion(), sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, cutSequenceMode='TREE', branches=branchesToKeep, inputFolder=dcMaker.path, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, config=self.config, debug=self.verbose) # check if this part of the sample is already cached isCached = tc.partIsCached() print( "check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:" .format(sample=sample.name, part=self.chunkNumber), isCached) if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree( { 'name': sample.identifier, 'folder': dcMaker.path }, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print( "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m" ) raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk( self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print( "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m" ) raise Exception("SampleFilesHaveChanged") # connect the TreeCache object to the input sampleTree and add it to the list of cached trees self.treeCaches.append( tc.setSampleTree(self.sampleTree).cache()) else: print("WARNING: no datacard regions added, nothing to do.") return self def run(self): if len(self.treeCaches) > 0: # run on the tree self.sampleTree.process() else: print("nothing to do!")
class CacheDatacards(object): def __init__(self, config, regions, sampleToCache, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None, verbose=False): self.verbose = verbose self.config = config self.regions = regions self.treeCaches = [] self.sampleTree = None self.sampleToCache = sampleToCache self.forceRedo = forceRedo # settings which part of input files to process self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None # initialize Datacard objects self.dcMakers = [Datacard(config=self.config, region=region) for region in self.regions] # make a minimum list of samples which is needed to produce all the Datacard regions at the same time def getAllSamples(self): samples = [] for dcMaker in self.dcMakers: for sample in dcMaker.getAllSamples(): if len([x for x in samples if x.name == sample.name]) < 1: samples.append(sample) return samples def prepare(self): if len(self.dcMakers) > 0: self.treeCaches = [] self.sampleTree = None # cuts allSamples = self.getAllSamples() subsamples = [x for x in allSamples if x.identifier == self.sampleToCache] # loop over all datacard regions for dcMaker in self.dcMakers: # loop over all subsamples (which come from the same root tree files) for sample in subsamples: # combine subcut and systematics cut with logical AND # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics isData = (sample.type == 'DATA') systematicsCuts = sorted(list(set([x['cachecut'] for x in dcMaker.getSystematicsList(isData=isData)]))) sampleCuts = {'AND': [sample.subcut, {'OR': systematicsCuts}]} if True or self.verbose: print (json.dumps(sampleCuts, sort_keys=True, indent=8, default=str)) # make list of branches to keep in root file branchList = BranchList(sample.subcut) branchList.addCut([x['cachecut'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['cut'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['var'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['weight'] for x in dcMaker.getSystematicsList()]) branchList.addCut(self.config.get('Weights', 'weightF')) branchList.addCut(eval(self.config.get('Branches', 'keep_branches'))) branchesToKeep = branchList.getListOfBranches() # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'dc:{region}_{sample}'.format(region=dcMaker.getRegion(), sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, cutSequenceMode='TREE', branches=branchesToKeep, inputFolder=dcMaker.path, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, config=self.config, debug=self.verbose ) # check if this part of the sample is already cached isCached = tc.partIsCached() print ("check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:".format(sample=sample.name, part=self.chunkNumber), isCached) if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree({'name': sample.identifier, 'folder': dcMaker.path}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m") raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m") raise Exception("SampleFilesHaveChanged") # connect the TreeCache object to the input sampleTree and add it to the list of cached trees self.treeCaches.append(tc.setSampleTree(self.sampleTree).cache()) else: print("WARNING: no datacard regions added, nothing to do.") return self def run(self): if len(self.treeCaches) > 0: # run on the tree self.sampleTree.process() else: print ("nothing to do!")