class XbbRun: def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...] # TODO: make it fully recursive def parseCollectionList(self, collections): collectionsListsReplaced = [] for collection in collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] listExpression = self.config.get(section, key).strip() if listExpression.startswith('[') and listExpression.endswith(']'): listParsed = eval(listExpression) for i in listParsed: collectionsListsReplaced.append(i) else: collectionsListsReplaced.append(collection) else: collectionsListsReplaced.append(collection) return collectionsListsReplaced # run all subjobs def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] versionTable = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] if self.config.has_section(section) and self.config.has_option(section, key): pyCode = self.config.get(section, key) elif '(' in collection and collection.endswith(')'): print "WARNING: config option", collection, " not found, interpreting it as Python code!" pyCode = collection else: print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m" raise Exception("ConfigError") # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) for cb in ["finish", "prepareOutput"]: if hasattr(wObject, cb) and callable(getattr(wObject, cb)): sampleTree.addCallback(cb, getattr(wObject, cb)) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0]) else: print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m" raise Exception("ConfigError") for moduleName, moduleVersion in versionTable: print " > {m}:{v}".format(m=moduleName, v=moduleVersion) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")
class PartialFileMerger(object): def __init__(self, fileNames, chunkNumber, submitTime='000000_000000', force=False, config=None, sampleIdentifier=None): self.fileNames = fileNames self.debug = 'XBBDEBUG' in os.environ self.submitTime = submitTime self.chunkNumber = chunkNumber self.config = config self.fileLocator = FileLocator(config=self.config) # -O option (reoptimizing baskets) leads to crashes... self.commandTemplate = "hadd -k -ff {output} {inputs}" self.sampleIdentifier = sampleIdentifier self.force = force # use sampleTree class as replacement for hadd self.useChain = True treeHashes = [] for fileName in self.fileNames: treeHashes.append(hashlib.sha224(fileName).hexdigest()) totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest() self.mergedFileName = '/'.join( self.fileNames[0].split('/')[:-4] ) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root' % chunkNumber # return a fake name which is written to sample list .txt files in order to keep compatibility to the method of converting file names in .txt # files to file names after prep step. This conversion applied to the fake name will give the real file name. def getMergedFakeFileName(self): return self.mergedFileName # real output file name where the file is stored def getOutputFileName(self): fakeFileName = self.getMergedFakeFileName() outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) return "{path}/{sample}/{fileName}".format( path=self.config.get('Directories', 'HADDout'), sample=self.sampleIdentifier, fileName=outputFileName) def getTemporaryFileName(self): fakeFileName = self.getMergedFakeFileName() outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) return "{path}/hadd/{sample}/{fileName}".format( path=self.config.get('Directories', 'scratch'), sample=self.sampleIdentifier, fileName=outputFileName) def run(self): inputFileNames = [ "{path}/{sample}/{fileName}".format( path=self.config.get('Directories', 'HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames ] outputFileName = self.getTemporaryFileName() self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1])) command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "") if self.debug: print("DEBUG: run \x1b[34m", command, "\x1b[0m") if self.useChain: # use sampleTree class (can e.g. drop branches at the same time) sampleTree = SampleTree(inputFileNames, config=self.config) try: removeBranches = eval( self.config.get('General', 'remove_branches')) for removeBranch in removeBranches: sampleTree.addBranchToBlacklist(removeBranch) print("DEBUG: disable branch ", removeBranch) except Exception as e: print("DEBUG: could not disable branch:", e) sampleTree.addOutputTree(outputFileName, cut='1', branches='*') sampleTree.process() result = 0 else: # standard hadd result = self.fileLocator.runCommand(command) print("INFO: hadd returned ", result) if result == 0: finalOutputFileName = self.getOutputFileName() print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m") self.fileLocator.makedirs('/'.join( finalOutputFileName.split('/')[:-1])) resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force) if not resultCopy: print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m") raise Exception("FileCopyError") # try to delete temporary file try: self.fileLocator.rm(outputFileName) except Exception as e: print("ERROR: could not delete temporary file:", outputFileName, " => ", e) print("INFO: done.") else: raise Exception("HaddError")
# TODO: collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', collections for fileName in filelist: localFileName = fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format(path=pathIN, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format(path=pathOUT, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if opts.force or not fileLocator.isValidRootFile(outputFileName): # load sample tree and initialize vtype corrector sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = pathIN + '/' + fileName print "FO:", fileNameOriginal xrootdRedirector = fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" continue
# TODO: collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', collections for fileName in filelist: localFileName = fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format(path=pathIN, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format(path=pathOUT, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if not fileLocator.exists(outputFileName) or opts.force: # load sample tree and initialize vtype corrector sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" continue # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...] collectionsListsReplaced = [] for collection in collections: if '.' in collection:
class PartialFileMerger(object): def __init__(self, fileNames, chunkNumber, submitTime='000000_000000', force=False, config=None, sampleIdentifier=None): self.fileNames = fileNames self.debug = 'XBBDEBUG' in os.environ self.submitTime = submitTime self.chunkNumber = chunkNumber self.config = config self.fileLocator = FileLocator(config=self.config) # -O option (reoptimizing baskets) leads to crashes... self.commandTemplate = "hadd -k -ff {output} {inputs}" self.sampleIdentifier = sampleIdentifier self.force = force # use sampleTree class as replacement for hadd self.useChain = True treeHashes = [] for fileName in self.fileNames: treeHashes.append(hashlib.sha224(fileName).hexdigest()) totalHash = hashlib.sha224('-'.join(sorted(treeHashes))).hexdigest() self.mergedFileName = '/'.join(self.fileNames[0].split('/')[:-4]) + '/' + totalHash + '/' + self.submitTime + '/0000/tree_%d.root'%chunkNumber # return a fake name which is written to sample list .txt files in order to keep compatibility to the method of converting file names in .txt # files to file names after prep step. This conversion applied to the fake name will give the real file name. def getMergedFakeFileName(self): return self.mergedFileName # real output file name where the file is stored def getOutputFileName(self): fakeFileName = self.getMergedFakeFileName() outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) return "{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDout'), sample=self.sampleIdentifier, fileName=outputFileName) def getTemporaryFileName(self): fakeFileName = self.getMergedFakeFileName() outputFileName = self.fileLocator.getFilenameAfterPrep(fakeFileName) return "{path}/hadd/{sample}/{fileName}".format(path=self.config.get('Directories','scratch'), sample=self.sampleIdentifier, fileName=outputFileName) def run(self): inputFileNames = ["{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames] outputFileName = self.getTemporaryFileName() self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1])) command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "") if self.debug: print ("DEBUG: run \x1b[34m", command, "\x1b[0m") if self.useChain: # use sampleTree class (can e.g. drop branches at the same time) sampleTree = SampleTree(inputFileNames, config=self.config) try: removeBranches = eval(self.config.get('General', 'remove_branches')) for removeBranch in removeBranches: sampleTree.addBranchToBlacklist(removeBranch) print("DEBUG: disable branch ", removeBranch) except Exception as e: print("DEBUG: could not disable branch:", e) sampleTree.addOutputTree(outputFileName, cut='1', branches='*') sampleTree.process() result = 0 else: # standard hadd result = self.fileLocator.runCommand(command) print ("INFO: hadd returned ", result) if result == 0: finalOutputFileName = self.getOutputFileName() print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m") self.fileLocator.makedirs('/'.join(finalOutputFileName.split('/')[:-1])) resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force) if not resultCopy: print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m") raise Exception("FileCopyError") # try to delete temporary file try: self.fileLocator.rm(outputFileName) except Exception as e: print("ERROR: could not delete temporary file:", outputFileName, " => ", e) print("INFO: done.") else: raise Exception("HaddError")
class XbbRun: def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) samplesinfo = self.config.get('Directories', 'samplesinfo') self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samplesinfo, self.pathIN).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...] # TODO: make it fully recursive def parseCollectionList(self, collections): collectionsListsReplaced = [] for collection in collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] listExpression = self.config.get(section, key).strip() if listExpression.startswith('[') and listExpression.endswith(']'): listParsed = eval(listExpression) for i in listParsed: collectionsListsReplaced.append(i) else: collectionsListsReplaced.append(collection) else: collectionsListsReplaced.append(collection) return collectionsListsReplaced # run all subjobs def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] pyCode = self.config.get(section, key) # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")