def run(self): name = self.config.get('Configuration', 'channel') if self.config.has_option('Configuration', 'channel') else '_' timestamp = datetime.datetime.now().strftime("%y%m%d") tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root' destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root' sampleTree = SampleTree(self.fileNames, config=self.config) if self.config.has_option('Plot_general', 'controlSample'): controlSampleDict = eval(self.config.get('Plot_general', 'controlSample')) controlSample = controlSampleDict[self.region] if self.region in controlSampleDict else -1 sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i") print("INFO: setting controlSample to", controlSample) sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False) sampleTree.process() # copy to final destination if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(tmpName, destName, force=True) print('copy ', tmpName, destName) if not self.fileLocator.isValidRootFile(destName): print("\x1b[31mERROR: copy failed, output is broken!\x1b[0m") else: try: self.fileLocator.rm(tmpName) except Exception as e: print(e) except Exception as e: print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
def run(self): inputFileNames = [ "{path}/{sample}/{fileName}".format( path=self.config.get('Directories', 'HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames ] outputFileName = self.getTemporaryFileName() self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1])) command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "") if self.debug: print("DEBUG: run \x1b[34m", command, "\x1b[0m") if self.useChain: # use sampleTree class (can e.g. drop branches at the same time) sampleTree = SampleTree(inputFileNames, config=self.config) try: removeBranches = eval( self.config.get('General', 'remove_branches')) for removeBranch in removeBranches: sampleTree.addBranchToBlacklist(removeBranch) print("DEBUG: disable branch ", removeBranch) except Exception as e: print("DEBUG: could not disable branch:", e) sampleTree.addOutputTree(outputFileName, cut='1', branches='*') sampleTree.process() result = 0 else: # standard hadd result = self.fileLocator.runCommand(command) print("INFO: hadd returned ", result) if result == 0: finalOutputFileName = self.getOutputFileName() print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m") self.fileLocator.makedirs('/'.join( finalOutputFileName.split('/')[:-1])) resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force) if not resultCopy: print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m") raise Exception("FileCopyError") # try to delete temporary file try: self.fileLocator.rm(outputFileName) except Exception as e: print("ERROR: could not delete temporary file:", outputFileName, " => ", e) print("INFO: done.") else: raise Exception("HaddError")
def run(self): inputFileNames = ["{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames] outputFileName = self.getTemporaryFileName() self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1])) command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "") if self.debug: print ("DEBUG: run \x1b[34m", command, "\x1b[0m") if self.useChain: # use sampleTree class (can e.g. drop branches at the same time) sampleTree = SampleTree(inputFileNames, config=self.config) try: removeBranches = eval(self.config.get('General', 'remove_branches')) for removeBranch in removeBranches: sampleTree.addBranchToBlacklist(removeBranch) print("DEBUG: disable branch ", removeBranch) except Exception as e: print("DEBUG: could not disable branch:", e) sampleTree.addOutputTree(outputFileName, cut='1', branches='*') sampleTree.process() result = 0 else: # standard hadd result = self.fileLocator.runCommand(command) print ("INFO: hadd returned ", result) if result == 0: finalOutputFileName = self.getOutputFileName() print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m") self.fileLocator.makedirs('/'.join(finalOutputFileName.split('/')[:-1])) resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force) if not resultCopy: print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m") raise Exception("FileCopyError") # try to delete temporary file try: self.fileLocator.rm(outputFileName) except Exception as e: print("ERROR: could not delete temporary file:", outputFileName, " => ", e) print("INFO: done.") else: raise Exception("HaddError")
def run(self): name = self.config.get('Configuration', 'channel') if self.config.has_option( 'Configuration', 'channel') else '_' timestamp = datetime.datetime.now().strftime("%y%m%d") tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root' destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root' sampleTree = SampleTree(self.fileNames, config=self.config) if self.config.has_option('Plot_general', 'controlSample'): controlSampleDict = eval( self.config.get('Plot_general', 'controlSample')) controlSample = controlSampleDict[ self.region] if self.region in controlSampleDict else -1 sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i") print("INFO: setting controlSample to", controlSample) sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False) sampleTree.process() # copy to final destination if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(tmpName, destName, force=True) print('copy ', tmpName, destName) if not self.fileLocator.isValidRootFile(destName): print( "\x1b[31mERROR: copy failed, output is broken!\x1b[0m") else: try: self.fileLocator.rm(tmpName) except Exception as e: print(e) except Exception as e: print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] versionTable = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] if self.config.has_section(section) and self.config.has_option(section, key): pyCode = self.config.get(section, key) elif '(' in collection and collection.endswith(')'): print "WARNING: config option", collection, " not found, interpreting it as Python code!" pyCode = collection else: print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m" raise Exception("ConfigError") # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) for cb in ["finish", "prepareOutput"]: if hasattr(wObject, cb) and callable(getattr(wObject, cb)): sampleTree.addCallback(cb, getattr(wObject, cb)) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0]) else: print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m" raise Exception("ConfigError") for moduleName, moduleVersion in versionTable: print " > {m}:{v}".format(m=moduleName, v=moduleVersion) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")
# TODO: this can also be made a separate module if 'addbranches' in collections: writeNewVariables = eval(config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) if 'removebranches' in collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(tmpFileName, cut='1', branches='*') sampleTree.process() # copy temporary file to output folder if opts.force and fileLocator.exists(outputFileName): fileLocator.rm(outputFileName) try: fileLocator.cp(tmpFileName, outputFileName) except Exception as e: print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" print e try: fileLocator.rm(tmpFileName) except Exception as e:
def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] pyCode = self.config.get(section, key) # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")
# register callbacks for processing sampleTree.addCallback('event', tfe.processEvent) # define new branches to add sampleTree.addOutputBranches(tfe.getBranches()) try: os.makedirs(outputFolder) except: pass # define output file tmpFileName = scratch + '/' + inputFile.split('/')[-1] outputFileName = outputFolder + '/' + inputFile.split('/')[-1] sampleTree.addOutputTree(tmpFileName, cut='weight<999&&weight>-999', branches='*') # process tree sampleTree.process() # copy to final location try: fileLocator.cp(tmpFileName, outputFileName) except Exception as e: print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" print e try: fileLocator.rm(tmpFileName) except Exception as e: print "ERROR: could not delete file on scratch!"
# register callbacks for processing sampleTree.addCallback('event', tfe.processEvent) # define new branches to add sampleTree.addOutputBranches(tfe.getBranches()) try: os.makedirs(outputFolder) except: pass # define output file tmpFileName = scratch + '/' + inputFile.split('/')[-1] outputFileName = outputFolder + '/' + inputFile.split('/')[-1] sampleTree.addOutputTree(tmpFileName, cut='weight<999&&weight>-999', branches='*') # process tree sampleTree.process() # copy to final location try: fileLocator.cp(tmpFileName, outputFileName) except Exception as e: print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" print e try: fileLocator.rm(tmpFileName) except Exception as e: print "ERROR: could not delete file on scratch!" print e