Exemplo n.º 1
0
    def __init__(self,
                 fileNames,
                 chunkNumber,
                 submitTime='000000_000000',
                 force=False,
                 config=None,
                 sampleIdentifier=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force

        # use sampleTree class as replacement for hadd
        self.useChain = True

        treeHashes = []
        for fileName in self.fileNames:
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(
            self.fileNames[0].split('/')[:-4]
        ) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root' % chunkNumber
Exemplo n.º 2
0
Arquivo: hadd.py Projeto: GLP90/Xbb
    def __init__(self, fileNames, chunkNumber, submitTime='000000_000000', force=False, config=None, sampleIdentifier=None, inputDir=None, outputDir=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force
        
        # use sampleTree class as replacement for hadd
        # this way baskets will be also optimized and unused branches can be stripped off
        self.useChain = True

        self.inputDir  = self.config.get('Directories', inputDir if inputDir else 'HADDin')
        self.outputDir = self.config.get('Directories', outputDir if outputDir else 'HADDout')
        self.scratchDir = self.config.get('Directories','scratch')

        treeHashes = []
        for fileName in self.fileNames: 
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(self.fileNames[0].split('/')[:-4]) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root'%chunkNumber
Exemplo n.º 3
0
    def prepare(self):
        print(
            "INFO: starting plot for region \x1b[34m{region}\x1b[0m, variables:"
            .format(region=region))
        for var in self.vars:
            print("  > {var}".format(var=var))

        self.histogramStacks = {}
        for var in self.vars:
            self.histogramStacks[var] = StackMaker(self.config,
                                                   var,
                                                   self.region,
                                                   self.signalRegion,
                                                   None,
                                                   '_' + self.subcutPlotName,
                                                   title=self.title)

        fileLocator = FileLocator(config=self.config,
                                  useDirectoryListingCache=True)

        # add DATA + MC samples
        for sample in self.dataSamples + self.mcSamples:

            # cuts
            sampleCuts = [sample.subcut]
            if self.config.has_option('Cuts', self.region):
                sampleCuts.append(self.config.get('Cuts', self.region))
            if self.config.has_option(self.configSection, 'Datacut'):
                sampleCuts.append(
                    self.config.get(self.configSection, 'Datacut'))
            if self.addBlindingCut:
                sampleCuts.append(self.addBlindingCut)

            # get sample tree from cache
            tc = TreeCache.TreeCache(sample=sample,
                                     cutList=sampleCuts,
                                     inputFolder=self.samplesPath,
                                     config=config,
                                     fileLocator=fileLocator)
            sampleTree = tc.getTree()

            if sampleTree:
                groupName = self.getSampleGroup(sample)
                print(" > found the tree, #entries = ",
                      sampleTree.tree.GetEntries())
                print("   > group =", groupName)
                print(" > now adding the tree for vars=", self.vars)

                # add the sample tree for all the variables
                for var in self.vars:
                    self.histogramStacks[var].addSampleTree(
                        sample=sample,
                        sampleTree=sampleTree,
                        groupName=groupName,
                        cut=self.subcut if self.subcut else '1')
            else:
                print("\x1b[31mERROR: sampleTree not available for ", sample,
                      ", run caching again!!\x1b[0m")
                raise Exception("CachedTreeMissing")
        return self
Exemplo n.º 4
0
 def __init__(self, config, region):
     self.config = config
     self.region = region
     self.fileLocator = FileLocator(config=self.config,
                                    useDirectoryListingCache=True)
     self.dcMaker = Datacard(config=self.config,
                             region=region,
                             fileLocator=self.fileLocator)
Exemplo n.º 5
0
    def test_xrootd(self):
        if 'X509_USER_PROXY' in os.environ and len(os.environ['X509_USER_PROXY'].strip()) > 0:
            path1 = 'root://xrootd-cms.infn.it//store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2017/V11/TTToSemiLeptonic_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv4-PU2017_1282/190510_115113/0000/tree_1.root'
            tree1 = self.getTree(path1)
            print ("ENTRIES:", tree1.GetEntries())
            self.assertEqual(tree1.GetEntries(), 552904L)

            fileLocator = FileLocator()

            path2 = fileLocator.removeRedirector(path1)
            print ("PATH2:", path2)
            self.assertTrue(path2.startswith('/store/group/phys_higgs/'))
            self.assertTrue(path2.endswith('/tree_1.root'))

            path3 = fileLocator.addRedirector(redirector='root://xrootd-cms.infn.it', fileName=path2)
            self.assertEqual(path1, path3)
        else:
            print("INFO: this test is skipped because no X509 proxy certificate is found which is needed to access the files!")
Exemplo n.º 6
0
    def test_xrootd(self):
        if 'X509_USER_PROXY' in os.environ and len(os.environ['X509_USER_PROXY'].strip()) > 0:
            path1 = 'root://xrootd-cms.infn.it//store/group/phys_higgs/hbb/ntuples/V25/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/VHBB_HEPPY_V25_TT_TuneCUETP8M2T4_13TeV-powheg-Py8__RunIISummer16MAv2-PUMoriond17_80r2as_2016_TrancheIV_v6-v1/170202_212737/0000/tree_100.root'
            tree1 = self.getTree(path1)
            print ("ENTRIES:", tree1.GetEntries())
            self.assertEqual(tree1.GetEntries(), 48442)

            fileLocator = FileLocator()

            path2 = fileLocator.removeRedirector(path1)
            print ("PATH2:", path2)
            self.assertTrue(path2.startswith('/store/group/phys_higgs/'))
            self.assertTrue(path2.endswith('/tree_100.root'))

            path3 = fileLocator.addRedirector(redirector='root://xrootd-cms.infn.it', fileName=path2)
            self.assertEqual(path1, path3)
        else:
            print("INFO: this test is skipped because no X509 proxy certificate is found which is needed to access the files!")
Exemplo n.º 7
0
    def __init__(self, config, region, sampleIdentifier=None, opts=None):
        self.config = config
        self.region = region
        self.sampleIdentifiers = sampleIdentifier.split(',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode)
        else:
            print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace)

        # input/output paths
        self.fileLocator = FileLocator(config=self.config)
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = config.get('Directories','samplesinfo') 
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection='Plot:%s'%region

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option('Plot_general','addBlindingCut'): #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general','addBlindingCut')
            print ('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        self.data = eval(self.config.get(self.configSection, 'Datas')) # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get('Plot_general', 'samples')) # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [x for x in self.dataSamples if x.identifier in self.sampleIdentifiers]
            self.mcSamples =   [x for x in self.mcSamples   if x.identifier in self.sampleIdentifiers]
Exemplo n.º 8
0
    def test_xrootd(self):
        if 'X509_USER_PROXY' in os.environ and len(
                os.environ['X509_USER_PROXY'].strip()) > 0:
            path1 = 'root://xrootd-cms.infn.it//store/group/phys_higgs/hbb/ntuples/V25/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/VHBB_HEPPY_V25_TT_TuneCUETP8M2T4_13TeV-powheg-Py8__RunIISummer16MAv2-PUMoriond17_80r2as_2016_TrancheIV_v6-v1/170202_212737/0000/tree_100.root'
            tree1 = self.getTree(path1)
            print("ENTRIES:", tree1.GetEntries())
            self.assertEqual(tree1.GetEntries(), 48442)

            fileLocator = FileLocator()

            path2 = fileLocator.removeRedirector(path1)
            print("PATH2:", path2)
            self.assertTrue(path2.startswith('/store/group/phys_higgs/'))
            self.assertTrue(path2.endswith('/tree_100.root'))

            path3 = fileLocator.addRedirector(
                redirector='root://xrootd-cms.infn.it', fileName=path2)
            self.assertEqual(path1, path3)
        else:
            print(
                "INFO: this test is skipped because no X509 proxy certificate is found which is needed to access the files!"
            )
Exemplo n.º 9
0
    def __init__(self,
                 fileNames,
                 chunkNumber,
                 submitTime='000000_000000',
                 force=False,
                 config=None,
                 sampleIdentifier=None,
                 inputDir=None,
                 outputDir=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force

        # use sampleTree class as replacement for hadd
        # this way baskets will be also optimized and unused branches can be stripped off
        self.useChain = True

        self.inputDir = self.config.get('Directories',
                                        inputDir if inputDir else 'HADDin')
        self.outputDir = self.config.get('Directories',
                                         outputDir if outputDir else 'HADDout')
        self.scratchDir = self.config.get('Directories', 'scratch')

        treeHashes = []
        for fileName in self.fileNames:
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(
            self.fileNames[0].split('/')[:-4]
        ) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root' % chunkNumber
Exemplo n.º 10
0
Arquivo: hadd.py Projeto: perrozzi/Xbb
    def __init__(self, fileNames, chunkNumber, submitTime='000000_000000', force=False, config=None, sampleIdentifier=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force
        
        # use sampleTree class as replacement for hadd
        self.useChain = True

        treeHashes = []
        for fileName in self.fileNames: 
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(self.fileNames[0].split('/')[:-4]) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root'%chunkNumber
Exemplo n.º 11
0
config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis","tag")
TrainFlag = eval(config.get('Analysis','TrainFlag'))
btagLibrary = config.get('BTagReshaping','library')
samplesinfo=config.get('Directories','samplesinfo')
channel=config.get('Configuration','channel')
VHbbNameSpace=config.get('VHbbNameSpace','library')
ROOT.gSystem.Load(VHbbNameSpace)
pathIN = config.get('Directories','SYSin')
pathOUT = config.get('Directories','SYSout')
tmpDir = config.get('Directories','scratch')
print 'INput samples:\t%s'%pathIN
print 'OUTput samples:\t%s'%pathOUT

fileLocator = FileLocator(config=config)

# samples
info = ParseInfo(samplesinfo, pathIN)
matchingSamples = [x for x in info if x.identifier==opts.sampleIdentifier and not x.subsample]
if len(matchingSamples) != 1:
    print "need exactly 1 sample identifier as input with -S !!"
    print matchingSamples
    exit(1)
sample = matchingSamples[0]

# TODO: 
collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
if len(collections) < 1:
    print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
print 'collections to add:', collections
Exemplo n.º 12
0
import sys
import os
from myutils.XbbConfig import XbbConfigReader, XbbConfigTools
from myutils import ParseInfo
from myutils.FileLocator import FileLocator
from myutils.XbbTools import XbbTools

argv = sys.argv
parser = OptionParser()
parser.add_option("-T","--tag", dest="tag", default='', help="config tag")
parser.add_option("-D","--directory", dest="directory", default='MVAout', help="directory name, e.g. MVAout")
parser.add_option("-S","--sample", dest="sample", default='TT*', help="sample")
(opts, args) = parser.parse_args(argv)

config = XbbConfigTools(config=XbbConfigReader.read(opts.tag))
path = config.get("Directories", opts.directory)
sampleInfoDirectory = config.get('Directories', 'samplefiles')
info = ParseInfo(samples_path=path, config=config)

# only take first sample which matches
sampleIdentifier = XbbTools.filterSampleList(info.getSampleIdentifiers(), XbbTools.parseSamplesList(opts.sample))[0]

# get list of ORIGINAL file names for this sample: /store/...
sampleTreeFileNames = XbbTools.getSampleTreeFileNames(sampleInfoDirectory, sampleIdentifier)

fileLocator = FileLocator(config=config)

# get local name of ffirst file
localFilename     = fileLocator.getFilePath(path, sampleIdentifier, sampleTreeFileNames[0])
print(localFilename)
Exemplo n.º 13
0
if opts.config == "":
        opts.config = "config"

weight = opts.weight
evaluate_optimisation = False
if weight != '':
    evaluate_optimisation = True

#Import after configure to get help message
from myutils import BetterConfigParser, ParseInfo, MvaEvaluator

config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis", "tag")

fileLocator = FileLocator(config=config)
print ("OPTS", opts)
if len(opts.fileList) > 0:
    filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
    print ("len(filelist)", len(filelist))
    if len(filelist) > 0:
        print ("filelist[0]:", filelist[0])
else:
    filelist = SampleTree({'name': opts.sampleIdentifier, 'folder': config.get('Directories', 'MVAin')}, countOnly=True, splitFilesChunkSize=-1, config=config).getSampleFileNameChunks()[0]
    print ("INFO: no file list given, use all files!")
    print (len(filelist), filelist)

#get locations:
Wdir = config.get('Directories', 'Wdir')
samplesinfo = config.get('Directories', 'samplesinfo')
Exemplo n.º 14
0
    def __init__(self, config, region, sampleIdentifier=None, opts=None):
        self.config = config
        self.region = region
        self.sampleIdentifiers = sampleIdentifier.split(
            ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

        # input/output paths
        self.fileLocator = FileLocator(config=self.config)
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection = 'Plot:%s' % region

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option(
                'Plot_general', 'addBlindingCut'
        ):  #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general',
                                                  'addBlindingCut')
            print('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        self.data = eval(self.config.get(
            self.configSection,
            'Datas'))  # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get(
            'Plot_general', 'samples'))  # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [
                x for x in self.dataSamples
                if x.identifier in self.sampleIdentifiers
            ]
            self.mcSamples = [
                x for x in self.mcSamples
                if x.identifier in self.sampleIdentifiers
            ]
Exemplo n.º 15
0
config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis", "tag")
TrainFlag = eval(config.get('Analysis', 'TrainFlag'))
btagLibrary = config.get('BTagReshaping', 'library')
samplesinfo = config.get('Directories', 'samplesinfo')
channel = config.get('Configuration', 'channel')
VHbbNameSpace = config.get('VHbbNameSpace', 'library')
ROOT.gSystem.Load(VHbbNameSpace)
pathIN = config.get('Directories', 'SYSin')
pathOUT = config.get('Directories', 'SYSout')
tmpDir = config.get('Directories', 'scratch')
print 'INput samples:\t%s' % pathIN
print 'OUTput samples:\t%s' % pathOUT

fileLocator = FileLocator(config=config)

# samples
info = ParseInfo(samplesinfo, pathIN)
matchingSamples = [
    x for x in info
    if x.identifier == opts.sampleIdentifier and not x.subsample
]
if len(matchingSamples) != 1:
    print "need exactly 1 sample identifier as input with -S !!"
    print matchingSamples
    exit(1)
sample = matchingSamples[0]

# TODO:
collections = [
Exemplo n.º 16
0
class XbbRun:

    def __init__(self, opts):

        # get file list
        self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
        print "len(filelist)",len(self.filelist),
        if len(self.filelist) > 0:
            print "filelist[0]:", self.filelist[0]
        else:
            print ''

        # config
        self.debug = 'XBBDEBUG' in os.environ
        self.verifyCopy = True
        self.opts = opts
        self.config = BetterConfigParser()
        self.config.read(opts.config)
        self.channel = self.config.get('Configuration', 'channel')

        # load namespace, TODO
        VHbbNameSpace = self.config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # directories
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')
        print 'INput samples:\t%s'%self.pathIN
        print 'OUTput samples:\t%s'%self.pathOUT

        self.fileLocator = FileLocator(config=self.config)

        # check if given sample identifier uniquely matches a samples from config
        matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier)
        if len(matchingSamples) != 1:
            print "ERROR: need exactly 1 sample identifier as input with -S !!"
            print matchingSamples
            exit(1)
        self.sample = matchingSamples[0]

        # collections
        self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
        if len(self.collections) < 1:
            print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
        print 'collections to add:', self.collections
        self.collections = self.parseCollectionList(self.collections)
        print 'after parsing:', self.collections

        # temorary folder to save the files of this job on the scratch
        temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex

        # input files
        self.subJobs = []
        if opts.join:
            print("INFO: join input files! This is an experimental feature!")

            # translate naming convention of .txt file to imported files after the prep step
            inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist]

            self.subJobs.append({
                'inputFileNames': self.filelist,
                'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                })

        else:
            
            # create separate subjob for all files (default!)
            for inputFileName in self.filelist:
                inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)]

                self.subJobs.append({
                    'inputFileNames': [inputFileName],
                    'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                    'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                    'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                    })

    # lists of single modules can be given instead of a module, "--addCollections Sys.all"
    # [Sys]
    # all = ['Sys.Vtype', 'Sys.Leptons', ...]
    # TODO: make it fully recursive
    def parseCollectionList(self, collections): 
        collectionsListsReplaced = []
        for collection in collections:
            if '.' in collection:
                section = collection.split('.')[0]
                key = collection.split('.')[1]
                listExpression = self.config.get(section, key).strip()
                if listExpression.startswith('[') and listExpression.endswith(']'):
                    listParsed = eval(listExpression)
                    for i in listParsed:
                        collectionsListsReplaced.append(i)
                else:
                    collectionsListsReplaced.append(collection)
            else:
                collectionsListsReplaced.append(collection)
        return collectionsListsReplaced

    # run all subjobs
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                versionTable = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        if self.config.has_section(section) and self.config.has_option(section, key):
                            pyCode = self.config.get(section, key)
                        elif '(' in collection and collection.endswith(')'):
                            print "WARNING: config option", collection, " not found, interpreting it as Python code!"
                            pyCode = collection 
                        else:
                            print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m"
                            raise Exception("ConfigError")

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)
                        for cb in ["finish", "prepareOutput"]:
                            if hasattr(wObject, cb) and callable(getattr(wObject, cb)):
                                sampleTree.addCallback(cb, getattr(wObject, cb))

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                        versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0])
                    else:
                        print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m"
                        raise Exception("ConfigError")

                for moduleName, moduleVersion in versionTable:
                    print " > {m}:{v}".format(m=moduleName, v=moduleVersion)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
Exemplo n.º 17
0
class SkimsHelper(object):

    def __init__(self, config, region, sampleIdentifier=None, opts=None):
        self.config = config
        self.region = region
        self.sampleIdentifiers = sampleIdentifier.split(',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode)
        else:
            print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace)

        # input/output paths
        self.fileLocator = FileLocator(config=self.config)
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = config.get('Directories','samplesinfo') 
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection='Plot:%s'%region

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option('Plot_general','addBlindingCut'): #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general','addBlindingCut')
            print ('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        self.data = eval(self.config.get(self.configSection, 'Datas')) # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get('Plot_general', 'samples')) # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [x for x in self.dataSamples if x.identifier in self.sampleIdentifiers]
            self.mcSamples =   [x for x in self.mcSamples   if x.identifier in self.sampleIdentifiers]

    def prepare(self):
        # add DATA + MC samples
        self.fileNames = []
        for sample in self.dataSamples + self.mcSamples:
            print(sample.identifier)
            
            # cuts
            sampleCuts = [sample.subcut]
            if self.config.has_option('Cuts', self.region):
                sampleCuts.append(self.config.get('Cuts', self.region))
            if self.config.has_option(self.configSection, 'Datacut'):
                sampleCuts.append(self.config.get(self.configSection, 'Datacut'))
            if self.addBlindingCut:
                sampleCuts.append(self.addBlindingCut)
            
            # get sample tree from cache
            self.fileNames += TreeCache.TreeCache(
                    sample=sample,
                    cutList=sampleCuts,
                    inputFolder=self.samplesPath,
                    config=config
                ).findCachedFileNames()
        if len(self.fileNames) < 1:
            print("\x1b[31mERROR: no files found, run cacheplot!\x1b[0m")
        return self

    def run(self):
        name = self.config.get('Configuration', 'channel') if self.config.has_option('Configuration', 'channel') else '_'
        timestamp = datetime.datetime.now().strftime("%y%m%d")
        tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root'
        destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root'

        sampleTree = SampleTree(self.fileNames, config=self.config) 

        if self.config.has_option('Plot_general', 'controlSample'):
            controlSampleDict = eval(self.config.get('Plot_general', 'controlSample'))
            controlSample = controlSampleDict[self.region] if self.region in controlSampleDict else -1
            sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i")
            print("INFO: setting controlSample to", controlSample)

        sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False)
        sampleTree.process()

        # copy to final destination
        if sampleTree.getNumberOfOutputTrees() > 0:
            try:
                self.fileLocator.cp(tmpName, destName, force=True)
                print('copy ', tmpName, destName)

                if not self.fileLocator.isValidRootFile(destName):
                    print("\x1b[31mERROR: copy failed, output is broken!\x1b[0m")
                else:
                    try:
                        self.fileLocator.rm(tmpName)
                    except Exception as e:
                        print(e)
            except Exception as e:
                print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
Exemplo n.º 18
0
Arquivo: sys_new.py Projeto: GLP90/Xbb
    def __init__(self, opts):

        # get file list
        self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
        print "len(filelist)",len(self.filelist),
        if len(self.filelist) > 0:
            print "filelist[0]:", self.filelist[0]
        else:
            print ''

        # config
        self.debug = 'XBBDEBUG' in os.environ
        self.verifyCopy = True
        self.opts = opts
        self.config = BetterConfigParser()
        self.config.read(opts.config)
        samplesinfo = self.config.get('Directories', 'samplesinfo')
        self.channel = self.config.get('Configuration', 'channel')

        # load namespace, TODO
        VHbbNameSpace = self.config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # directories
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')
        print 'INput samples:\t%s'%self.pathIN
        print 'OUTput samples:\t%s'%self.pathOUT

        self.fileLocator = FileLocator(config=self.config)

        # check if given sample identifier uniquely matches a samples from config
        matchingSamples = ParseInfo(samplesinfo, self.pathIN).find(identifier=opts.sampleIdentifier)
        if len(matchingSamples) != 1:
            print "ERROR: need exactly 1 sample identifier as input with -S !!"
            print matchingSamples
            exit(1)
        self.sample = matchingSamples[0]

        # collections
        self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
        if len(self.collections) < 1:
            print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
        print 'collections to add:', self.collections
        self.collections = self.parseCollectionList(self.collections)
        print 'after parsing:', self.collections

        # temorary folder to save the files of this job on the scratch
        temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex

        # input files
        self.subJobs = []
        if opts.join:
            print("INFO: join input files! This is an experimental feature!")

            # translate naming convention of .txt file to imported files after the prep step
            inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist]

            self.subJobs.append({
                'inputFileNames': self.filelist,
                'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                })

        else:
            
            # create separate subjob for all files (default!)
            for inputFileName in self.filelist:
                inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)]

                self.subJobs.append({
                    'inputFileNames': [inputFileName],
                    'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                    'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                    'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                    })
Exemplo n.º 19
0
Arquivo: sys_new.py Projeto: GLP90/Xbb
class XbbRun:

    def __init__(self, opts):

        # get file list
        self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
        print "len(filelist)",len(self.filelist),
        if len(self.filelist) > 0:
            print "filelist[0]:", self.filelist[0]
        else:
            print ''

        # config
        self.debug = 'XBBDEBUG' in os.environ
        self.verifyCopy = True
        self.opts = opts
        self.config = BetterConfigParser()
        self.config.read(opts.config)
        samplesinfo = self.config.get('Directories', 'samplesinfo')
        self.channel = self.config.get('Configuration', 'channel')

        # load namespace, TODO
        VHbbNameSpace = self.config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # directories
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')
        print 'INput samples:\t%s'%self.pathIN
        print 'OUTput samples:\t%s'%self.pathOUT

        self.fileLocator = FileLocator(config=self.config)

        # check if given sample identifier uniquely matches a samples from config
        matchingSamples = ParseInfo(samplesinfo, self.pathIN).find(identifier=opts.sampleIdentifier)
        if len(matchingSamples) != 1:
            print "ERROR: need exactly 1 sample identifier as input with -S !!"
            print matchingSamples
            exit(1)
        self.sample = matchingSamples[0]

        # collections
        self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
        if len(self.collections) < 1:
            print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
        print 'collections to add:', self.collections
        self.collections = self.parseCollectionList(self.collections)
        print 'after parsing:', self.collections

        # temorary folder to save the files of this job on the scratch
        temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex

        # input files
        self.subJobs = []
        if opts.join:
            print("INFO: join input files! This is an experimental feature!")

            # translate naming convention of .txt file to imported files after the prep step
            inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist]

            self.subJobs.append({
                'inputFileNames': self.filelist,
                'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                })

        else:
            
            # create separate subjob for all files (default!)
            for inputFileName in self.filelist:
                inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)]

                self.subJobs.append({
                    'inputFileNames': [inputFileName],
                    'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                    'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                    'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                    })

    # lists of single modules can be given instead of a module, "--addCollections Sys.all"
    # [Sys]
    # all = ['Sys.Vtype', 'Sys.Leptons', ...]
    # TODO: make it fully recursive
    def parseCollectionList(self, collections): 
        collectionsListsReplaced = []
        for collection in collections:
            if '.' in collection:
                section = collection.split('.')[0]
                key = collection.split('.')[1]
                listExpression = self.config.get(section, key).strip()
                if listExpression.startswith('[') and listExpression.endswith(']'):
                    listParsed = eval(listExpression)
                    for i in listParsed:
                        collectionsListsReplaced.append(i)
                else:
                    collectionsListsReplaced.append(collection)
            else:
                collectionsListsReplaced.append(collection)
        return collectionsListsReplaced

    # run all subjobs
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        pyCode = self.config.get(section, key)

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
Exemplo n.º 20
0
config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis","tag")
TrainFlag = eval(config.get('Analysis','TrainFlag'))
btagLibrary = config.get('BTagReshaping','library')
samplesinfo=config.get('Directories','samplesinfo')
channel=config.get('Configuration','channel')
VHbbNameSpace=config.get('VHbbNameSpace','library')
ROOT.gSystem.Load(VHbbNameSpace)
pathIN = config.get('Directories','SYSin')
pathOUT = config.get('Directories','SYSout')
tmpDir = config.get('Directories','scratch')
print 'INput samples:\t%s'%pathIN
print 'OUTput samples:\t%s'%pathOUT

fileLocator = FileLocator(config=config)

# samples
info = ParseInfo(samplesinfo, pathIN)
matchingSamples = [x for x in info if x.identifier==opts.sampleIdentifier and not x.subsample]
if len(matchingSamples) != 1:
    print "need exactly 1 sample identifier as input with -S !!"
    print matchingSamples
    exit(1)
sample = matchingSamples[0]

# TODO: 
collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
if len(collections) < 1:
    print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
print 'collections to add:', collections
Exemplo n.º 21
0
config = BetterConfigParser()
config.add_section(mvaName)
config.set(mvaName, "tensorflowConfig", tensorflowConfig)
config.set(mvaName, "scalerDump", scalerDump) 
config.set(mvaName, "checkpoint", checkpoint) 
config.set(mvaName, "branchName", branchName)
config.set(mvaName, "nClasses", "%d"%nClasses)
config.set(mvaName, "treeVarSet", "dnnVars") 
config.add_section("systematics")
config.set("systematics", "systematics", " ".join(systematics))
config.add_section("dnnVars")
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName=inputTreeName, xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())
Exemplo n.º 22
0
class PartialFileMerger(object):
    def __init__(self,
                 fileNames,
                 chunkNumber,
                 submitTime='000000_000000',
                 force=False,
                 config=None,
                 sampleIdentifier=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force

        # use sampleTree class as replacement for hadd
        self.useChain = True

        treeHashes = []
        for fileName in self.fileNames:
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(
            self.fileNames[0].split('/')[:-4]
        ) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root' % chunkNumber

    # return a fake name which is written to sample list .txt files in order to keep compatibility to the method of converting file names in .txt
    # files to file names after prep step. This conversion applied to the fake name will give the real file name.
    def getMergedFakeFileName(self):
        return self.mergedFileName

    # real output file name where the file is stored
    def getOutputFileName(self):
        fakeFileName = self.getMergedFakeFileName()
        outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName)
        return "{path}/{sample}/{fileName}".format(
            path=self.config.get('Directories', 'HADDout'),
            sample=self.sampleIdentifier,
            fileName=outputFileName)

    def getTemporaryFileName(self):
        fakeFileName = self.getMergedFakeFileName()
        outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName)
        return "{path}/hadd/{sample}/{fileName}".format(
            path=self.config.get('Directories', 'scratch'),
            sample=self.sampleIdentifier,
            fileName=outputFileName)

    def run(self):
        inputFileNames = [
            "{path}/{sample}/{fileName}".format(
                path=self.config.get('Directories', 'HADDin'),
                sample=self.sampleIdentifier,
                fileName=self.fileLocator.getFilenameAfterPrep(fileName))
            for fileName in self.fileNames
        ]
        outputFileName = self.getTemporaryFileName()
        self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1]))
        command = self.commandTemplate.format(output=outputFileName,
                                              inputs=' '.join(inputFileNames),
                                              f="-f" if self.force else "")
        if self.debug:
            print("DEBUG: run \x1b[34m", command, "\x1b[0m")

        if self.useChain:
            # use sampleTree class (can e.g. drop branches at the same time)
            sampleTree = SampleTree(inputFileNames, config=self.config)

            try:
                removeBranches = eval(
                    self.config.get('General', 'remove_branches'))
                for removeBranch in removeBranches:
                    sampleTree.addBranchToBlacklist(removeBranch)
                    print("DEBUG: disable branch ", removeBranch)
            except Exception as e:
                print("DEBUG: could not disable branch:", e)
            sampleTree.addOutputTree(outputFileName, cut='1', branches='*')
            sampleTree.process()
            result = 0
        else:
            # standard hadd
            result = self.fileLocator.runCommand(command)

        print("INFO: hadd returned ", result)
        if result == 0:
            finalOutputFileName = self.getOutputFileName()
            print("move file to final destination: \x1b[34m",
                  finalOutputFileName, "\x1b[0m")
            self.fileLocator.makedirs('/'.join(
                finalOutputFileName.split('/')[:-1]))
            resultCopy = self.fileLocator.cp(outputFileName,
                                             finalOutputFileName, self.force)
            if not resultCopy:
                print("\x1b[31mERROR: copy failed\n from:", outputFileName,
                      "\n to:", finalOutputFileName, "\n force:", self.force,
                      "\x1b[0m")
                raise Exception("FileCopyError")
            # try to delete temporary file
            try:
                self.fileLocator.rm(outputFileName)
            except Exception as e:
                print("ERROR: could not delete temporary file:",
                      outputFileName, " => ", e)
            print("INFO: done.")
        else:
            raise Exception("HaddError")
Exemplo n.º 23
0
Arquivo: hadd.py Projeto: perrozzi/Xbb
class PartialFileMerger(object):
    def __init__(self, fileNames, chunkNumber, submitTime='000000_000000', force=False, config=None, sampleIdentifier=None):
        self.fileNames = fileNames
        self.debug = 'XBBDEBUG' in os.environ
        self.submitTime = submitTime
        self.chunkNumber = chunkNumber
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        # -O option (reoptimizing baskets) leads to crashes...
        self.commandTemplate = "hadd -k  -ff {output} {inputs}"
        self.sampleIdentifier = sampleIdentifier
        self.force = force
        
        # use sampleTree class as replacement for hadd
        self.useChain = True

        treeHashes = []
        for fileName in self.fileNames: 
            treeHashes.append(hashlib.sha224(fileName).hexdigest())
        totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest()
        self.mergedFileName = '/'.join(self.fileNames[0].split('/')[:-4]) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root'%chunkNumber

    # return a fake name which is written to sample list .txt files in order to keep compatibility to the method of converting file names in .txt
    # files to file names after prep step. This conversion applied to the fake name will give the real file name.
    def getMergedFakeFileName(self):
        return self.mergedFileName

    # real output file name where the file is stored
    def getOutputFileName(self):
        fakeFileName = self.getMergedFakeFileName()
        outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) 
        return "{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDout'), sample=self.sampleIdentifier, fileName=outputFileName)
    
    def getTemporaryFileName(self):
        fakeFileName = self.getMergedFakeFileName()
        outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) 
        return "{path}/hadd/{sample}/{fileName}".format(path=self.config.get('Directories','scratch'), sample=self.sampleIdentifier, fileName=outputFileName)
    
    def run(self):
        inputFileNames = ["{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames]
        outputFileName = self.getTemporaryFileName()
        self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1]))
        command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "")
        if self.debug:
            print ("DEBUG: run \x1b[34m", command, "\x1b[0m")
        
        if self.useChain:
            # use sampleTree class (can e.g. drop branches at the same time)
            sampleTree = SampleTree(inputFileNames, config=self.config)

            try:
                removeBranches = eval(self.config.get('General', 'remove_branches'))
                for removeBranch in removeBranches:
                    sampleTree.addBranchToBlacklist(removeBranch)
                    print("DEBUG: disable branch ", removeBranch)
            except Exception as e:
                print("DEBUG: could not disable branch:", e)
            sampleTree.addOutputTree(outputFileName, cut='1', branches='*')
            sampleTree.process()
            result = 0
        else:
            # standard hadd
            result = self.fileLocator.runCommand(command)

        print ("INFO: hadd returned ", result)
        if result == 0:
            finalOutputFileName = self.getOutputFileName()
            print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m")
            self.fileLocator.makedirs('/'.join(finalOutputFileName.split('/')[:-1]))
            resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force)
            if not resultCopy:
                print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m")
                raise Exception("FileCopyError")
            # try to delete temporary file
            try:
                self.fileLocator.rm(outputFileName)
            except Exception as e:
                print("ERROR: could not delete temporary file:", outputFileName, " => ", e)
            print("INFO: done.")
        else:
            raise Exception("HaddError")
Exemplo n.º 24
0
# create minimal Xbb config
config = BetterConfigParser()
config.add_section(mvaName)
config.set(mvaName, "tensorflowConfig", tensorflowConfig)
config.set(mvaName, "scalerDump", scalerDump) 
config.set(mvaName, "checkpoint", checkpoint) 
config.set(mvaName, "branchName", branchName)
config.set(mvaName, "treeVarSet", "dnnVars") 
config.add_section("systematics")
config.set("systematics", "systematics", " ".join(systematics))
config.add_section("dnnVars")
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName="tree", xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())
Exemplo n.º 25
0
class SkimsHelper(object):
    def __init__(self, config, region, sampleIdentifier=None, opts=None):
        self.config = config
        self.region = region
        self.sampleIdentifiers = sampleIdentifier.split(
            ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

        # input/output paths
        self.fileLocator = FileLocator(config=self.config)
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection = 'Plot:%s' % region

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option(
                'Plot_general', 'addBlindingCut'
        ):  #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general',
                                                  'addBlindingCut')
            print('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        self.data = eval(self.config.get(
            self.configSection,
            'Datas'))  # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get(
            'Plot_general', 'samples'))  # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [
                x for x in self.dataSamples
                if x.identifier in self.sampleIdentifiers
            ]
            self.mcSamples = [
                x for x in self.mcSamples
                if x.identifier in self.sampleIdentifiers
            ]

    def prepare(self):
        # add DATA + MC samples
        self.fileNames = []
        for sample in self.dataSamples + self.mcSamples:
            print(sample.identifier)

            # cuts
            sampleCuts = [sample.subcut]
            if self.config.has_option('Cuts', self.region):
                sampleCuts.append(self.config.get('Cuts', self.region))
            if self.config.has_option(self.configSection, 'Datacut'):
                sampleCuts.append(
                    self.config.get(self.configSection, 'Datacut'))
            if self.addBlindingCut:
                sampleCuts.append(self.addBlindingCut)

            # get sample tree from cache
            tc = TreeCache.TreeCache(sample=sample,
                                     cutList=sampleCuts,
                                     inputFolder=self.samplesPath,
                                     config=config)
            if tc.isCached():
                self.fileNames += tc.findCachedFileNames()
            else:
                print("ERROR: not cached, run cacheplot again")
                raise Exception("NotCached")
        if len(self.fileNames) < 1:
            print("\x1b[31mERROR: no files found, run cacheplot!\x1b[0m")
        return self

    def run(self):
        name = self.config.get('Configuration',
                               'channel') if self.config.has_option(
                                   'Configuration', 'channel') else '_'
        timestamp = datetime.datetime.now().strftime("%y%m%d")
        tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root'
        destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root'

        sampleTree = SampleTree(self.fileNames, config=self.config)

        if self.config.has_option('Plot_general', 'controlSample'):
            controlSampleDict = eval(
                self.config.get('Plot_general', 'controlSample'))
            controlSample = controlSampleDict[
                self.region] if self.region in controlSampleDict else -1
            sampleTree.addOutputBranch("controlSample",
                                       lambda x: controlSample,
                                       branchType="i")
            print("INFO: setting controlSample to", controlSample)

        sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False)
        sampleTree.process()

        # copy to final destination
        if sampleTree.getNumberOfOutputTrees() > 0:
            try:
                self.fileLocator.cp(tmpName, destName, force=True)
                print('copy ', tmpName, destName)

                if not self.fileLocator.isValidRootFile(destName):
                    print(
                        "\x1b[31mERROR: copy failed, output is broken!\x1b[0m")
                else:
                    try:
                        self.fileLocator.rm(tmpName)
                    except Exception as e:
                        print(e)
            except Exception as e:
                print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
Exemplo n.º 26
0
    def __init__(self, opts):

        # get file list
        self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
        print "len(filelist)",len(self.filelist),
        if len(self.filelist) > 0:
            print "filelist[0]:", self.filelist[0]
        else:
            print ''

        # config
        self.debug = 'XBBDEBUG' in os.environ
        self.verifyCopy = True
        self.opts = opts
        self.config = BetterConfigParser()
        self.config.read(opts.config)
        self.channel = self.config.get('Configuration', 'channel')

        # load namespace, TODO
        VHbbNameSpace = self.config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # directories
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')
        print 'INput samples:\t%s'%self.pathIN
        print 'OUTput samples:\t%s'%self.pathOUT

        self.fileLocator = FileLocator(config=self.config)

        # check if given sample identifier uniquely matches a samples from config
        matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier)
        if len(matchingSamples) != 1:
            print "ERROR: need exactly 1 sample identifier as input with -S !!"
            print matchingSamples
            exit(1)
        self.sample = matchingSamples[0]

        # collections
        self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
        if len(self.collections) < 1:
            print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
        print 'collections to add:', self.collections
        self.collections = self.parseCollectionList(self.collections)
        print 'after parsing:', self.collections

        # temorary folder to save the files of this job on the scratch
        temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex

        # input files
        self.subJobs = []
        if opts.join:
            print("INFO: join input files! This is an experimental feature!")

            # translate naming convention of .txt file to imported files after the prep step
            inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist]

            self.subJobs.append({
                'inputFileNames': self.filelist,
                'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                })

        else:
            
            # create separate subjob for all files (default!)
            for inputFileName in self.filelist:
                inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)]

                self.subJobs.append({
                    'inputFileNames': [inputFileName],
                    'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                    'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                    'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                    })
Exemplo n.º 27
0
                  action="store_true",
                  dest="force",
                  default=False,
                  help="force overwriting of already cached files")
(opts, args) = parser.parse_args(sys.argv)
if opts.config == "":
    opts.config = "config"

#Import after configure to get help message
from myutils import BetterConfigParser, ParseInfo, MvaEvaluator
config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis", "tag")

# get list of files to process
fileLocator = FileLocator(config=config)
if len(opts.fileList) > 0:
    filelist = FileList.decompress(
        opts.fileList) if len(opts.fileList) > 0 else None
    print("len(filelist)", len(filelist))
    if len(filelist) > 0:
        print("filelist[0]:", filelist[0])
else:
    filelist = SampleTree(
        {
            'name': opts.sampleIdentifier,
            'folder': config.get('Directories', 'MVAin')
        },
        countOnly=True,
        splitFilesChunkSize=-1,
        config=config).getSampleFileNameChunks()[0]