def preExecute(self, input = set(), output = set()): msg.info('Preparing for BSJobSplitterExecutor execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) #See if we need to unpack a TAR file if 'hitarFile' in self.conf.argdict: print ("Untarring inputHITARFile", self.conf.argdict['hitarFile'].value) try: f=tarfile.open(name=self.conf.argdict['hitarFile'].value[0]) f.list() f.extractall() f.close() except Exception as e: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting HI input files for transform: {0}'.format(e)) # There are two ways to configure this transform: # - Give an inputZeroBiasBSFile argument directly # - Give a inputBSCONFIGFile and jobNumber argument # Check now that we have a configuration that works if 'inputZeroBiasBSFile' in self.conf.argdict and 'inputBSCONFIGFile' in self.conf.argdict: #raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Both inputZeroBiasBSFile and inputBSCONFIGFile have been specified - please use only one.') del self.conf.argdict['inputZeroBiasBSFile'] print ("WARNING - removed the inputZeroBiasBSFile argument, because inputZeroBiasBSFile and inputBSCONFIGFile were already specified") if 'inputBSCONFIGFile' in self.conf.argdict: if 'jobNumber' not in self.conf.argdict: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputBSCONFIGFile is specified, but no jobNumber was given.') # Job number has to wrap around from 500, dropping back to 1 wrappedJobNumber = (self.conf.argdict['jobNumber'].value-1)%500 + 1 self._inputFilelist = 'filelist_{0}.txt'.format(wrappedJobNumber) self._lbnList = 'lbn_anal_map_{0}.txt'.format(wrappedJobNumber) try: print (self.conf.argdict['inputBSCONFIGFile'].value) f=tarfile.open(name=self.conf.argdict['inputBSCONFIGFile'].value[0]) f.extract('filelist_{0}.txt'.format(wrappedJobNumber)) f.extract('lbn_anal_map_{0}.txt'.format(wrappedJobNumber)) f.close() bsInputs = open(self._inputFilelist).readline().rstrip().split(',') self.conf.addToArgdict('inputZeroBiasBSFile', trfArgClasses.argBSFile(bsInputs, io='input', type='BS', subtype='BS_ZeroBias')) self.conf.addToDataDictionary('ZeroBiasBS', self.conf.argdict['inputZeroBiasBSFile']) input.add('ZeroBiasBS') msg.info('Validating resolved input bytestream files') trfValidation.performStandardFileValidation({'ZeroBiasBS': self.conf.argdict['inputZeroBiasBSFile']}, io='input') except Exception as e: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting input files for transform: {0}'.format(e)) # Now setup correct input arguments self.conf.argdict['InputLbnMapFile'] = trfArgClasses.argString(self._lbnList) self.conf.argdict['InputFileMapFile'] = trfArgClasses.argString(self._inputFilelist) else: #if 'lumiBlockMapFile' not in self.conf.argdict: # raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputZeroBiasBSFile is specified, but no lumiBlockMapFile was given.') #self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile'] if 'lumiBlockMapFile' in self.conf.argdict: self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile'] super(BSJobSplitterExecutor, self).preExecute(input=input, output=output)
def _doSteering(self, steeringDict=None): if not steeringDict: steeringDict = self._argdict['steering'].value for substep, steeringValues in steeringDict.iteritems(): foundSubstep = False for executor in self._executors: if executor.name == substep or executor.substep == substep: foundSubstep = True msg.debug('Updating {0} with {1}'.format( executor.name, steeringValues)) # Steering consists of tuples with (in/out, +/-, datatype) for steeringValue in steeringValues: if steeringValue[0] == 'in': startSet = executor.inData else: startSet = executor.outData origLen = len(startSet) msg.debug('Data values to be modified are: {0}'.format( startSet)) if steeringValue[1] is '+': startSet.add(steeringValue[2]) if len(startSet) != origLen + 1: raise trfExceptions.TransformSetupException( trfExit.nameToCode( 'TRF_GRAPH_STEERING_ERROR'), 'Attempting to add data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype already there?' .format(steeringValue[2], executor.name, steeringValue[1], startSet)) else: startSet.discard(steeringValue[2]) if len(startSet) != origLen - 1: raise trfExceptions.TransformSetupException( trfExit.nameToCode( 'TRF_GRAPH_STEERING_ERROR'), 'Attempting to remove data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype even present?' .format(steeringValue[2], executor.name, steeringValue[1], startSet)) msg.debug('Updated data values to: {0}'.format(startSet)) if not foundSubstep: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'), 'This transform has no executor/substep {0}'.format( substep))
def __init__(self, executorSet, inputData=set([]), outputData=set([])): # Set basic node list self._nodeDict = {} msg.info('Transform graph input data: {0}; output data {1}'.format( inputData, outputData)) if len(executorSet) == 1: # Single executor - in this case inData/outData is not mandatory, so we set them to the # input/output data of the transform executor = list(executorSet)[0] if len(executor._inData) == 0 and len(executor._outData) == 0: executor.inData = inputData executor.outData = outputData for executor in executorSet: self.addNode(executor) self._inputData = set(inputData) self._outputData = set(outputData) # It's forbidden for a transform to consume and produce the same datatype dataOverlap = self._inputData & self._outputData if len(dataOverlap) > 0: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.' .format(' '.join(dataOverlap))) # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives # This makes the graph 'concrete' for this job # This is useful as then data edges all connect properly to a pair of nodes # We add a node for every possible output as this enables topo sorting of the graph # nodes for any intermediate data end nodes as well pseudoNodes = dict() pseudoNodes['_start'] = graphNode(name='_start', inData=[], outData=self._inputData, weight=0) for node in itervalues(self._nodeDict): for dataType in node.outputDataTypes: endNodeName = '_end_{0}'.format(dataType) pseudoNodes[endNodeName] = graphNode(name=endNodeName, inData=[dataType], outData=[], weight=0) self._nodeDict.update(pseudoNodes) # Toposort not yet done self._toposort = [] self._toposortData = [] # Now find connections between nodes self.findConnections()
def _tracePath(self): self._executorGraph.findExecutionPath() self._executorPath = self._executorGraph.execution if len(self._executorPath) is 0: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_SETUP'), 'Execution path finding resulted in no substeps being executed' '(Did you correctly specify input data for this transform?)') # Tell the first executor that they are the first self._executorDictionary[self._executorPath[0] ['name']].conf.firstExecutor = True