Пример #1
0
class CachePlot(object):
    def __init__(self,
                 config,
                 sampleIdentifier,
                 regions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 forceRedo=False,
                 fileList=None):
        self.config = config
        self.sampleIdentifier = sampleIdentifier
        self.regions = list(set(regions))
        self.forceRedo = forceRedo

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.sampleNames = list(
            eval(self.config.get('Plot_general', 'samples')))
        self.dataNames = list(eval(self.config.get('Plot_general', 'Data')))
        self.samples = self.samplesInfo.get_samples(self.sampleNames +
                                                    self.dataNames)

        self.regionsDict = {}
        for region in self.regions:
            treeCut = config.get('Cuts', region)
            self.regionsDict[region] = {'cut': treeCut}
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

    def printInfo(self):
        print("REGION:".ljust(24), "CUT:")
        for region, regionInfo in self.regionsDict.iteritems():
            print(" > ", region.ljust(20), regionInfo['cut'])

    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(
                self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        try:
            keepBranchesPlot += eval(
                self.config.get('Branches', 'keep_branches'))
        except:
            pass

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                try:
                    if section.startswith(
                            'plotDef:') and self.config.has_option(
                                section, 'relPath'):
                        keepBranchesPlot.append(
                            self.config.get(section, 'relPath'))
                except Exception as e:
                    print("\x1b[31mWARNING: config error in:", section, "=>",
                          e, "\x1b[0m")
        except Exception as e2:
            print(
                "\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m"
            )
            print(e2)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        # plotting region cut
        for region, regionInfo in self.regionsDict.iteritems():
            keepBranchesPlot.append(regionInfo['cut'])
        keepBranchesPlotFinal = BranchList(
            keepBranchesPlot).getListOfBranches()
        print("KEEP:", keepBranchesPlotFinal)

        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print('*' * 80)
            print(' ', sampleToCache)
            print('*' * 80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [
                x for x in self.samples if x.identifier == sampleToCache
            ]
            for sample in subsamples:

                # add cuts for all training regions
                for region, regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s' % region

                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(
                            self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general',
                                              'addBlindingCut'):
                        sampleCuts.append(
                            self.config.has_option('Plot_general',
                                                   'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(
                        region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True)

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree(
                                {
                                    'name': sample.identifier,
                                    'folder': self.samplesPath
                                },
                                splitFilesChunkSize=self.splitFilesChunkSize,
                                chunkNumber=self.chunkNumber,
                                config=self.config,
                                saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print(
                                    "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m"
                                )
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(
                                self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) !=
                                                  sorted(fileListNow)):
                                print(
                                    "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m"
                                )
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(
                            tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print("INFO: already cached!", tc, "(", tc.hash, ")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print("nothing to do!")
Пример #2
0
class CachePlot(object):

    def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None):
        self.config = config
        self.sampleIdentifier = sampleIdentifier
        self.regions = list(set(regions))
        self.forceRedo = forceRedo

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = self.config.get('Directories','samplesinfo') 
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.sampleNames = eval(self.config.get('Plot_general', 'samples'))
        self.dataNames = eval(self.config.get('Plot_general', 'Data'))
        self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames)

        self.regionsDict = {}
        for region in self.regions:
            treeCut = config.get('Cuts', region)
            self.regionsDict[region] = {'cut': treeCut}
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None
    
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode)
        else:
            print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace)

    def printInfo(self):
        print ("REGION:".ljust(24),"CUT:")
        for region,regionInfo in self.regionsDict.iteritems():
            print (" > ",region.ljust(20), regionInfo['cut'])

    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        try:
            keepBranchesPlot += eval(self.config.get('Branches', 'keep_branches'))
        except:
            pass

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                if section.startswith('plotDef:') and self.config.has_option(section, 'relPath'):
                    keepBranchesPlot.append(self.config.get(section, 'relPath'))
        except Exception as e:
            print("\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m")
            print(e)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        # plotting region cut
        for region,regionInfo in self.regionsDict.iteritems():
            keepBranchesPlot.append(regionInfo['cut'])
        keepBranchesPlotFinal = BranchList(keepBranchesPlot).getListOfBranches()
        print("KEEP:", keepBranchesPlotFinal)


        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print ('*'*80)
            print (' ',sampleToCache)
            print ('*'*80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [x for x in self.samples if x.identifier == sampleToCache]
            for sample in subsamples:

                # add cuts for all training regions
                for region,regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s'%region
                    
                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general','addBlindingCut'):
                        sampleCuts.append(self.config.has_option('Plot_general', 'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True
                    )

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m")
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) != sorted(fileListNow)):
                                print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m")
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print ("INFO: already cached!",tc, "(",tc.hash,")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print ("nothing to do!")
Пример #3
0
class CacheDatacards(object):
    def __init__(self,
                 config,
                 regions,
                 sampleToCache,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 forceRedo=False,
                 fileList=None,
                 verbose=False):
        self.verbose = verbose or ('XBBDEBUG' in os.environ)
        self.config = config
        self.regions = regions
        self.treeCaches = []
        self.sampleTree = None
        self.sampleToCache = sampleToCache
        self.forceRedo = forceRedo

        # settings which part of input files to process
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None

        # initialize Datacard objects
        self.dcMakers = [
            Datacard(config=self.config, region=region)
            for region in self.regions
        ]

    # make a minimum list of samples which is needed to produce all the Datacard regions at the same time
    def getAllSamples(self):
        samples = []
        for dcMaker in self.dcMakers:
            for sample in dcMaker.getAllSamples():
                if len([x for x in samples if x.name == sample.name]) < 1:
                    samples.append(sample)
        return samples

    def prepare(self):
        if len(self.dcMakers) > 0:
            self.treeCaches = []
            self.sampleTree = None

            # cuts
            allSamples = self.getAllSamples()
            subsamples = [
                x for x in allSamples if x.identifier == self.sampleToCache
            ]

            # loop over all datacard regions
            for dcMaker in self.dcMakers:

                # loop over all subsamples (which come from the same root tree files)
                for sample in subsamples:

                    # combine subcut and systematics cut with logical AND
                    # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics

                    isData = (sample.type == 'DATA')
                    systematicsCuts = sorted(
                        list(
                            set([
                                x['cachecut']
                                for x in dcMaker.getSystematicsList(
                                    isData=isData)
                            ])))
                    sampleCuts = {
                        'AND': [sample.subcut, {
                            'OR': systematicsCuts
                        }]
                    }
                    if self.verbose:
                        print(
                            json.dumps(sampleCuts,
                                       sort_keys=True,
                                       indent=8,
                                       default=str))

                    # make list of branches to keep in root file
                    branchList = BranchList(sample.subcut)
                    branchList.addCut(
                        [x['cachecut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['cut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['var'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['weight'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(self.config.get('Weights', 'weightF'))
                    branchList.addCut(
                        eval(self.config.get('Branches', 'keep_branches')))
                    branchesToKeep = branchList.getListOfBranches()

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'dc:{region}_{sample}'.format(
                        region=dcMaker.getRegion(), sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        cutSequenceMode='TREE',
                        branches=branchesToKeep,
                        inputFolder=dcMaker.path,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        config=self.config,
                        debug=self.verbose)

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    print(
                        "check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:"
                        .format(sample=sample.name,
                                part=self.chunkNumber), isCached)
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree(
                                {
                                    'name': sample.identifier,
                                    'folder': dcMaker.path
                                },
                                splitFilesChunkSize=self.splitFilesChunkSize,
                                chunkNumber=self.chunkNumber,
                                config=self.config,
                                saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print(
                                    "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m"
                                )
                                raise Exception("CreationOfSampleTreeFailed")

                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(
                                self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) !=
                                                  sorted(fileListNow)):
                                print(
                                    "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m"
                                )
                                raise Exception("SampleFilesHaveChanged")

                        # connect the TreeCache object to the input sampleTree and add it to the list of cached trees
                        self.treeCaches.append(
                            tc.setSampleTree(self.sampleTree).cache())
        else:
            print("WARNING: no datacard regions added, nothing to do.")
        return self

    def run(self):
        if len(self.treeCaches) > 0:
            # run on the tree
            self.sampleTree.process()
        else:
            print("nothing to do!")
Пример #4
0
class CacheDatacards(object):

    def __init__(self, config, regions, sampleToCache, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None, verbose=False):
        self.verbose = verbose
        self.config = config
        self.regions = regions
        self.treeCaches = []
        self.sampleTree = None
        self.sampleToCache = sampleToCache
        self.forceRedo = forceRedo
        
        # settings which part of input files to process
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None

        # initialize Datacard objects
        self.dcMakers = [Datacard(config=self.config, region=region) for region in self.regions]
    
    # make a minimum list of samples which is needed to produce all the Datacard regions at the same time
    def getAllSamples(self):
        samples = []
        for dcMaker in self.dcMakers:
            for sample in dcMaker.getAllSamples():
                if len([x for x in samples if x.name == sample.name]) < 1:
                    samples.append(sample)
        return samples

    def prepare(self):
        if len(self.dcMakers) > 0:
            self.treeCaches = []
            self.sampleTree = None

            # cuts
            allSamples = self.getAllSamples() 
            subsamples = [x for x in allSamples if x.identifier == self.sampleToCache]

            # loop over all datacard regions
            for dcMaker in self.dcMakers:

                # loop over all subsamples (which come from the same root tree files)
                for sample in subsamples:

                    # combine subcut and systematics cut with logical AND
                    # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics

                    isData = (sample.type == 'DATA')
                    systematicsCuts = sorted(list(set([x['cachecut'] for x in dcMaker.getSystematicsList(isData=isData)])))
                    sampleCuts = {'AND': [sample.subcut, {'OR': systematicsCuts}]}
                    if True or self.verbose:
                        print (json.dumps(sampleCuts, sort_keys=True, indent=8, default=str))

                    # make list of branches to keep in root file
                    branchList = BranchList(sample.subcut)
                    branchList.addCut([x['cachecut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['cut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['var'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['weight'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(self.config.get('Weights', 'weightF'))
                    branchList.addCut(eval(self.config.get('Branches', 'keep_branches')))
                    branchesToKeep = branchList.getListOfBranches()

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'dc:{region}_{sample}'.format(region=dcMaker.getRegion(), sample=sample.name) 
                    
                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        cutSequenceMode='TREE',
                        branches=branchesToKeep,
                        inputFolder=dcMaker.path,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        config=self.config,
                        debug=self.verbose
                    )

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached() 
                    print ("check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:".format(sample=sample.name, part=self.chunkNumber), isCached)
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree({'name': sample.identifier, 'folder': dcMaker.path}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m")
                                raise Exception("CreationOfSampleTreeFailed")

                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) != sorted(fileListNow)):
                                print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m")
                                raise Exception("SampleFilesHaveChanged")

                        # connect the TreeCache object to the input sampleTree and add it to the list of cached trees 
                        self.treeCaches.append(tc.setSampleTree(self.sampleTree).cache())
        else:
            print("WARNING: no datacard regions added, nothing to do.")
        return self

    def run(self):
        if len(self.treeCaches) > 0:
            # run on the tree
            self.sampleTree.process()
        else:
            print ("nothing to do!")