Exemplo n.º 1
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,commandMap in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID))
                 commandStr = commandMap['command']
                 commentStr = commandMap['comment']
                 oldStatus  = commandMap['oldStatus']
                 tmpLog.info('start for {0}'.format(commandStr))
                 tmpStat = Interaction.SC_SUCCEEDED
                 if commandStr in ['kill','finish','reassign']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.sendMsg(tmpMsg,self.msgType)
                     # loop twice to see immediate result
                     for iLoop in range(2):
                         # get active PandaIDs to be killed
                         if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr:
                             pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID)
                         else:
                             pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True)
                         if pandaIDs == None:
                             tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID))
                             tmpStat = Interaction.SC_FAILED
                         # kill jobs or update task
                         if tmpStat == Interaction.SC_SUCCEEDED:
                             if pandaIDs == []:
                                 # done since no active jobs
                                 tmpMsg = 'completed cleaning jobs'
                                 tmpLog.sendMsg(tmpMsg,self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpTaskSpec = JediTaskSpec()
                                 tmpTaskSpec.jediTaskID = jediTaskID
                                 updateTaskStatus = True
                                 if commandStr != 'reassign':
                                     # reset oldStatus
                                     # keep oldStatus for task reassignment since it is reset when actually reassigned
                                     tmpTaskSpec.forceUpdate('oldStatus')
                                 else:
                                     # extract cloud or site
                                     if commentStr != None:
                                         tmpItems = commentStr.split(':')
                                         if tmpItems[0] == 'cloud':
                                             tmpTaskSpec.cloud = tmpItems[1]
                                         else:
                                             tmpTaskSpec.site = tmpItems[1]
                                         tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1])
                                         tmpLog.sendMsg(tmpMsg,self.msgType)
                                         tmpLog.info(tmpMsg)
                                         # back to oldStatus if necessary 
                                         if tmpItems[2] == 'y':
                                             tmpTaskSpec.status = oldStatus
                                             tmpTaskSpec.forceUpdate('oldStatus')
                                             updateTaskStatus = False
                                 if commandStr == 'reassign':
                                     tmpTaskSpec.forceUpdate('errorDialog')
                                 if updateTaskStatus:
                                     tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done']
                                 tmpMsg = 'set task.status={0}'.format(tmpTaskSpec.status)
                                 tmpLog.sendMsg(tmpMsg,self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID})
                                 tmpLog.info('done with {0}'.format(str(tmpRet)))
                                 break
                             else:
                                 # kill only in the first loop
                                 if iLoop > 0:
                                     break
                                 # wait or kill jobs 
                                 if 'soft finish' in commentStr:
                                     tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpRet = True
                                     tmpLog.info('done with {0}'.format(str(tmpRet)))
                                     break
                                 else:
                                     tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpLog.sendMsg(tmpMsg,self.msgType)
                                     if commandStr in ['reassign','finish']:
                                         # force kill
                                         tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True)
                                     else:
                                         # normal kill
                                         tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True)
                                     tmpLog.info('done with {0}'.format(str(tmpRet)))
                 elif commandStr in ['retry','incexec']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.sendMsg(tmpMsg,self.msgType)
                     # change task params for incexec
                     if commandStr == 'incexec':
                         try:
                             # read task params
                             taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                             taskParamMap = RefinerUtils.decodeJSON(taskParam)
                             # remove some params
                             for newKey in ['nFiles','fixedSandbox']:
                                 try:
                                     del taskParamMap[newKey]
                                 except:
                                     pass
                             # convert new params
                             newParamMap = RefinerUtils.decodeJSON(commentStr)
                             # change params
                             for newKey,newVal in newParamMap.iteritems():
                                 if newVal == None:
                                     # delete
                                     if newKey in taskParamMap:
                                         del taskParamMap[newKey]
                                 else:
                                     # change
                                     taskParamMap[newKey] = newVal
                             # overwrite sandbox
                             if 'fixedSandbox' in taskParamMap:
                                 # noBuild
                                 for tmpParam in taskParamMap['jobParameters']:
                                     if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None:
                                         tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox']
                                 # build
                                 if taskParamMap.has_key('buildSpec'):
                                     taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox']
                                 # merge
                                 if taskParamMap.has_key('mergeSpec'):
                                     taskParamMap['mergeSpec']['jobParameters'] = \
                                         re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters'])
                             # encode new param
                             strTaskParams = RefinerUtils.encodeJSON(taskParamMap)
                             tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams)
                             if tmpRet != True:
                                 tmpLog.error('failed to update task params')
                                 continue
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue))
                             continue
                     # retry failed files
                     tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr)
                     if tmpRet == True:
                         tmpMsg = 'set task.status={0}'.format(newTaskStatus)
                         tmpLog.sendMsg(tmpMsg,self.msgType)
                         tmpLog.info(tmpMsg)
                     tmpLog.info('done with {0}'.format(tmpRet))
                 else:
                     tmpLog.error('unknown command')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             errStr  = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue)
             errStr += traceback.format_exc()
             logger.error(errStr)
Exemplo n.º 2
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,commandMap in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID))
                 commandStr = commandMap['command']
                 commentStr = commandMap['comment']
                 oldStatus  = commandMap['oldStatus']
                 tmpLog.info('start for {0}'.format(commandStr))
                 tmpStat = Interaction.SC_SUCCEEDED
                 if commandStr in ['kill','finish','reassign']:
                     # get active PandaIDs to be killed
                     pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True)
                     if pandaIDs == None:
                         tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID))
                         tmpStat = Interaction.SC_FAILED
                     # kill jobs or update task
                     if tmpStat == Interaction.SC_SUCCEEDED:
                         if pandaIDs == []:
                             # done since no active jobs
                             tmpLog.info('completed the command')
                             tmpTaskSpec = JediTaskSpec()
                             tmpTaskSpec.jediTaskID = jediTaskID
                             updateTaskStatus = True
                             if commandStr != 'reassign':
                                 # keep oldStatus for task reassignment since it is reset when actually reassigned
                                 tmpTaskSpec.forceUpdate('oldStatus')
                             else:
                                 # extract cloud or site
                                 tmpItems = commentStr.split(':')
                                 if tmpItems[0] == 'cloud':
                                     tmpTaskSpec.cloud = tmpItems[1]
                                 else:
                                     tmpTaskSpec.site = tmpItems[1]
                                 # back to oldStatus if necessary 
                                 if tmpItems[2] == 'y':
                                     tmpTaskSpec.status = oldStatus
                                     tmpTaskSpec.forceUpdate('oldStatus')
                                     updateTaskStatus = False
                             if updateTaskStatus:
                                 tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done']
                             tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID})
                         else:
                             tmpLog.info('sending kill command')
                             tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True)
                         tmpLog.info('done with {0}'.format(str(tmpRet)))
                 elif commandStr in ['retry','incexec']:
                     # change task params for incexec
                     if commandStr == 'incexec':
                         try:
                             # read task params
                             taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                             taskParamMap = RefinerUtils.decodeJSON(taskParam)
                             # remove some params
                             for newKey in ['nFiles','fixedSandbox']:
                                 try:
                                     del taskParamMap[newKey]
                                 except:
                                     pass
                             # convert new params
                             newParamMap = RefinerUtils.decodeJSON(commentStr)
                             # change params
                             for newKey,newVal in newParamMap.iteritems():
                                 if newVal == None:
                                     # delete
                                     if newKey in taskParamMap:
                                         del taskParamMap[newKey]
                                 else:
                                     # change
                                     taskParamMap[newKey] = newVal
                             # overwrite sandbox
                             if 'fixedSandbox' in taskParamMap:
                                 # noBuild
                                 for tmpParam in taskParamMap['jobParameters']:
                                     if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None:
                                         tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox']
                                 # build
                                 if taskParamMap.has_key('buildSpec'):
                                     taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox']
                                 # merge
                                 if taskParamMap.has_key('mergeSpec'):
                                     taskParamMap['mergeSpec']['jobParameters'] = \
                                         re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters'])
                             # encode new param
                             strTaskParams = RefinerUtils.encodeJSON(taskParamMap)
                             tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams)
                             if tmpRet != True:
                                 tmpLog.error('failed to update task params')
                                 continue
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue))
                             continue
                     # retry failed files
                     tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr)
                     if tmpRet == True:
                         tmpMsg = 'set task.status={0}'.format(newTaskStatus)
                         tmpLog.sendMsg(tmpMsg,self.msgType)
                         tmpLog.info(tmpMsg)
                     tmpLog.info('done with {0}'.format(tmpRet))
                 else:
                     tmpLog.error('unknown command')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
Exemplo n.º 3
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,splitRule,taskStatus,parent_tid in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,'<jediTaskID={0}>'.format(jediTaskID))
                 tmpLog.info('start')
                 tmpStat = Interaction.SC_SUCCEEDED
                 errStr = ''
                 # read task parameters
                 try:
                     taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                     taskParamMap = RefinerUtils.decodeJSON(taskParam)
                 except:
                     errtype,errvalue = sys.exc_info()[:2]
                     errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue)
                     tmpLog.error(errStr)
                     tmpStat = Interaction.SC_FAILED
                 # get impl
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('getting Impl')
                     try:
                         # get VO and sourceLabel
                         vo = taskParamMap['vo']
                         prodSourceLabel = taskParamMap['prodSourceLabel']
                         taskType = taskParamMap['taskType']
                         tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType))
                         # get impl
                         impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType,
                                                                 self.taskBufferIF,self.ddmIF)
                         if impl == None:
                             # task refiner is undefined
                             errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # extract common parameters
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('extracting common')                    
                     try:
                         # initalize impl
                         impl.initializeRefiner(tmpLog)
                         # extarct common parameters
                         impl.extractCommon(jediTaskID,taskParamMap,self.workQueueMapper,splitRule)
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to extract common parameters with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # check parent
                 noWaitParent = False
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     if not parent_tid in [None,jediTaskID]:
                         tmpLog.info('check parent task')
                         try:
                             tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid)
                             if tmpStat == 'completed':
                                 # parent is done
                                 tmpStat = Interaction.SC_SUCCEEDED
                             elif tmpStat == 'running':
                                 if not impl.taskSpec.noWaitParent():
                                     # parent is running
                                     errStr = 'pending until parent task {0} is done'.format(parent_tid)
                                     impl.taskSpec.status = taskStatus
                                     impl.taskSpec.setOnHold()
                                     impl.taskSpec.setErrDiag(errStr)
                                     tmpLog.info(errStr)
                                     self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID})
                                     continue
                                 else:
                                     # not wait for parent
                                     tmpStat = Interaction.SC_SUCCEEDED
                                     noWaitParent = True
                             else:
                                 # parent is corrupted
                                 tmpStat = Interaction.SC_FAILED
                                 tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid)
                                 impl.taskSpec.setErrDiag(tmpErrStr)
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # refine
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('refining with {0}'.format(impl.__class__.__name__))
                     try:
                         tmpStat = impl.doRefine(jediTaskID,taskParamMap)
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         # no wait for parent
                         if impl.taskSpec.noWaitParent() and errtype == JediException.UnknownDatasetError:
                             impl.taskSpec.status = taskStatus
                             impl.taskSpec.setOnHold()
                             errStr = 'pending until parent produces input'
                             tmpLog.info(errStr)
                             self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID})
                             continue
                         else:
                             errStr = 'failed to refine task'
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # register
                 if tmpStat != Interaction.SC_SUCCEEDED:
                     tmpLog.error('failed to refine the task')
                     if impl == None or impl.taskSpec == None:
                         tmpTaskSpec = JediTaskSpec()
                         tmpTaskSpec.jediTaskID = jediTaskID
                     else:
                         tmpTaskSpec = impl.taskSpec
                     tmpTaskSpec.status = 'tobroken'
                     if errStr != '':
                         tmpTaskSpec.setErrDiag(errStr,True)
                     self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID})
                 else:
                     tmpLog.info('registering')                    
                     # fill JEDI tables
                     try:
                         # enable protection against task duplication
                         if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \
                                 not impl.taskSpec.checkPreProcessed():
                             uniqueTaskName = True
                         else:
                             uniqueTaskName = False
                         strTaskParams = None
                         if impl.updatedTaskParams != None:
                             strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams)
                         if taskStatus == 'registered':
                             # unset pre-process flag
                             if impl.taskSpec.checkPreProcessed():
                                 impl.taskSpec.setPostPreProcess()
                             # full registration
                             tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec,
                                                                                                  impl.inMasterDatasetSpec,
                                                                                                  impl.inSecDatasetSpecList,
                                                                                                  impl.outDatasetSpecList,
                                                                                                  impl.outputTemplateMap,
                                                                                                  impl.jobParamsTemplate,
                                                                                                  strTaskParams,
                                                                                                  impl.unmergeMasterDatasetSpec,
                                                                                                  impl.unmergeDatasetSpecMap,
                                                                                                  uniqueTaskName) 
                             if not tmpStat:
                                 tmpErrStr = 'failed to register the task to JEDI in a single shot'
                                 tmpLog.error(tmpErrStr)
                                 impl.taskSpec.status = 'tobroken'
                                 impl.taskSpec.setErrDiag(tmpErrStr,True)
                                 self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID})
                             tmpMsg = 'set task.status={0}'.format(newTaskStatus)
                             tmpLog.info(tmpMsg)
                             tmpLog.sendMsg(tmpMsg,self.msgType)
                         else:        
                             # appending for incremetnal execution
                             tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec,
                                                                             impl.inSecDatasetSpecList)
                             if not tmpStat:
                                 tmpLog.error('failed to append datasets for incexec')
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(tmpErrStr)
                     else:
                         tmpLog.info('done')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
Exemplo n.º 4
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,commandMap in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,' < jediTaskID={0} >'.format(jediTaskID))
                 commandStr = commandMap['command']
                 commentStr = commandMap['comment']
                 oldStatus  = commandMap['oldStatus']
                 tmpLog.info('start for {0}'.format(commandStr))
                 tmpStat = Interaction.SC_SUCCEEDED
                 if commandStr in ['kill','finish','reassign']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.info(tmpMsg)
                     tmpLog.sendMsg(tmpMsg,self.msgType)
                     # loop twice to see immediate result
                     for iLoop in range(2):
                         # get active PandaIDs to be killed
                         if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr:
                             pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID)
                         elif commandStr == 'reassign' and commentStr != None and 'nokill reassign' in commentStr:
                             pandaIDs = []
                         else:
                             pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True)
                         if pandaIDs == None:
                             tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID))
                             tmpStat = Interaction.SC_FAILED
                         # kill jobs or update task
                         if tmpStat == Interaction.SC_SUCCEEDED:
                             if pandaIDs == []:
                                 # done since no active jobs
                                 tmpMsg = 'completed cleaning jobs'
                                 tmpLog.sendMsg(tmpMsg,self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpTaskSpec = JediTaskSpec()
                                 tmpTaskSpec.jediTaskID = jediTaskID
                                 updateTaskStatus = True
                                 if commandStr != 'reassign':
                                     # reset oldStatus
                                     # keep oldStatus for task reassignment since it is reset when actually reassigned
                                     tmpTaskSpec.forceUpdate('oldStatus')
                                 else:
                                     # extract cloud or site
                                     if commentStr != None:
                                         tmpItems = commentStr.split(':')
                                         if tmpItems[0] == 'cloud':
                                             tmpTaskSpec.cloud = tmpItems[1]
                                         elif tmpItems[0] == 'nucleus':
                                             tmpTaskSpec.nucleus = tmpItems[1]
                                         else:
                                             tmpTaskSpec.site = tmpItems[1]
                                         tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1])
                                         tmpLog.sendMsg(tmpMsg,self.msgType)
                                         tmpLog.info(tmpMsg)
                                         # back to oldStatus if necessary 
                                         if tmpItems[2] == 'y':
                                             tmpTaskSpec.status = oldStatus
                                             tmpTaskSpec.forceUpdate('oldStatus')
                                             updateTaskStatus = False
                                 if commandStr == 'reassign':
                                     tmpTaskSpec.forceUpdate('errorDialog')
                                 if commandStr == 'finish':
                                     # update datasets
                                     tmpLog.info('updating datasets to finish')
                                     tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI(jediTaskID, self.pid)
                                     if not tmpStat:
                                         tmpLog.info('wait until datasets are updated to finish')
                                     # ignore failGoalUnreached when manually finished
                                     tmpStat,taskSpec = self.taskBufferIF.getTaskWithID_JEDI(jediTaskID)
                                     tmpTaskSpec.splitRule = taskSpec.splitRule
                                     tmpTaskSpec.unsetFailGoalUnreached()
                                 if updateTaskStatus:
                                     tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done']
                                 tmpMsg = 'set task_status={0}'.format(tmpTaskSpec.status)
                                 tmpLog.sendMsg(tmpMsg,self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID},
                                                                            setOldModTime=True)
                                 tmpLog.info('done with {0}'.format(str(tmpRet)))
                                 break
                             else:
                                 # kill only in the first loop
                                 if iLoop > 0:
                                     break
                                 # wait or kill jobs 
                                 if 'soft finish' in commentStr:
                                     queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID)
                                     tmpMsg = "trying to kill {0} queued jobs for soft finish".format(len(queuedPandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpRet = self.taskBufferIF.killJobs(queuedPandaIDs,commentStr,'52',True)
                                     tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpRet = True
                                     tmpLog.info('done with {0}'.format(str(tmpRet)))
                                     break
                                 else:
                                     tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpLog.sendMsg(tmpMsg,self.msgType)
                                     if commandStr in ['finish']:
                                         # force kill
                                         tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True)
                                     elif commandStr in ['reassign']:
                                         # force kill
                                         tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'51',True)
                                     else:
                                         # normal kill
                                         tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True)
                                     tmpLog.info('done with {0}'.format(str(tmpRet)))
                 elif commandStr in ['retry','incexec']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.info(tmpMsg)
                     tmpLog.sendMsg(tmpMsg,self.msgType)
                     # change task params for incexec
                     if commandStr == 'incexec':
                         try:
                             # read task params
                             taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                             taskParamMap = RefinerUtils.decodeJSON(taskParam)
                             # remove some params
                             for newKey in ['nFiles','fixedSandbox']:
                                 try:
                                     del taskParamMap[newKey]
                                 except:
                                     pass
                             # convert new params
                             newParamMap = RefinerUtils.decodeJSON(commentStr)
                             # change params
                             for newKey,newVal in newParamMap.iteritems():
                                 if newVal == None:
                                     # delete
                                     if newKey in taskParamMap:
                                         del taskParamMap[newKey]
                                 else:
                                     # change
                                     taskParamMap[newKey] = newVal
                             # overwrite sandbox
                             if 'fixedSandbox' in taskParamMap:
                                 # noBuild
                                 for tmpParam in taskParamMap['jobParameters']:
                                     if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None:
                                         tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox']
                                 # build
                                 if taskParamMap.has_key('buildSpec'):
                                     taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox']
                                 # merge
                                 if taskParamMap.has_key('mergeSpec'):
                                     taskParamMap['mergeSpec']['jobParameters'] = \
                                         re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters'])
                             # encode new param
                             strTaskParams = RefinerUtils.encodeJSON(taskParamMap)
                             tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams)
                             if tmpRet != True:
                                 tmpLog.error('failed to update task params')
                                 continue
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue))
                             continue
                     # retry child tasks
                     if 'sole ' in commentStr:
                         retryChildTasks = False
                     else:
                         retryChildTasks = True
                     # discard events
                     if 'discard ' in commentStr:
                         discardEvents = True
                     else:
                         discardEvents = False
                     tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr,
                                                                             retryChildTasks=retryChildTasks,
                                                                             discardEvents=discardEvents)
                     if tmpRet == True:
                         tmpMsg = 'set task_status={0}'.format(newTaskStatus)
                         tmpLog.sendMsg(tmpMsg,self.msgType)
                         tmpLog.info(tmpMsg)
                     tmpLog.info('done with {0}'.format(tmpRet))
                 else:
                     tmpLog.error('unknown command')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             errStr  = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue)
             errStr += traceback.format_exc()
             logger.error(errStr)
Exemplo n.º 5
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.debug(
                     '{0} terminating since no more items'.format(
                         self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID, commandMap in taskList:
                 # make logger
                 tmpLog = MsgWrapper(
                     self.logger, ' < jediTaskID={0} >'.format(jediTaskID))
                 commandStr = commandMap['command']
                 commentStr = commandMap['comment']
                 oldStatus = commandMap['oldStatus']
                 tmpLog.info('start for {0}'.format(commandStr))
                 tmpStat = Interaction.SC_SUCCEEDED
                 if commandStr in ['kill', 'finish', 'reassign']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.info(tmpMsg)
                     tmpLog.sendMsg(tmpMsg, self.msgType)
                     # loop twice to see immediate result
                     for iLoop in range(2):
                         # get active PandaIDs to be killed
                         if commandStr == 'reassign' and commentStr is not None and 'soft reassign' in commentStr:
                             pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(
                                 jediTaskID)
                         elif commandStr == 'reassign' and commentStr is not None and 'nokill reassign' in commentStr:
                             pandaIDs = []
                         else:
                             pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(
                                 jediTaskID, True)
                         if pandaIDs is None:
                             tmpLog.error(
                                 'failed to get PandaIDs for jediTaskID={0}'
                                 .format(jediTaskID))
                             tmpStat = Interaction.SC_FAILED
                         # kill jobs or update task
                         if tmpStat == Interaction.SC_SUCCEEDED:
                             if pandaIDs == []:
                                 # done since no active jobs
                                 tmpMsg = 'completed cleaning jobs'
                                 tmpLog.sendMsg(tmpMsg, self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpTaskSpec = JediTaskSpec()
                                 tmpTaskSpec.jediTaskID = jediTaskID
                                 updateTaskStatus = True
                                 if commandStr != 'reassign':
                                     # reset oldStatus
                                     # keep oldStatus for task reassignment since it is reset when actually reassigned
                                     tmpTaskSpec.forceUpdate('oldStatus')
                                 else:
                                     # extract cloud or site
                                     if commentStr is not None:
                                         tmpItems = commentStr.split(':')
                                         if tmpItems[0] == 'cloud':
                                             tmpTaskSpec.cloud = tmpItems[1]
                                         elif tmpItems[0] == 'nucleus':
                                             tmpTaskSpec.nucleus = tmpItems[
                                                 1]
                                         else:
                                             tmpTaskSpec.site = tmpItems[1]
                                         tmpMsg = 'set {0}={1}'.format(
                                             tmpItems[0], tmpItems[1])
                                         tmpLog.sendMsg(
                                             tmpMsg, self.msgType)
                                         tmpLog.info(tmpMsg)
                                         # back to oldStatus if necessary
                                         if tmpItems[2] == 'y':
                                             tmpTaskSpec.status = oldStatus
                                             tmpTaskSpec.forceUpdate(
                                                 'oldStatus')
                                             updateTaskStatus = False
                                 if commandStr == 'reassign':
                                     tmpTaskSpec.forceUpdate('errorDialog')
                                 if commandStr == 'finish':
                                     # update datasets
                                     tmpLog.info(
                                         'updating datasets to finish')
                                     tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI(
                                         jediTaskID, self.pid)
                                     if not tmpStat:
                                         tmpLog.info(
                                             'wait until datasets are updated to finish'
                                         )
                                     # ignore failGoalUnreached when manually finished
                                     tmpStat, taskSpec = self.taskBufferIF.getTaskWithID_JEDI(
                                         jediTaskID)
                                     tmpTaskSpec.splitRule = taskSpec.splitRule
                                     tmpTaskSpec.unsetFailGoalUnreached()
                                 if updateTaskStatus:
                                     tmpTaskSpec.status = JediTaskSpec.commandStatusMap(
                                     )[commandStr]['done']
                                 tmpMsg = 'set task_status={0}'.format(
                                     tmpTaskSpec.status)
                                 tmpLog.sendMsg(tmpMsg, self.msgType)
                                 tmpLog.info(tmpMsg)
                                 tmpRet = self.taskBufferIF.updateTask_JEDI(
                                     tmpTaskSpec,
                                     {'jediTaskID': jediTaskID},
                                     setOldModTime=True)
                                 tmpLog.info('done with {0}'.format(
                                     str(tmpRet)))
                                 break
                             else:
                                 # kill only in the first loop
                                 if iLoop > 0:
                                     break
                                 # wait or kill jobs
                                 if commentStr and 'soft finish' in commentStr:
                                     queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(
                                         jediTaskID)
                                     tmpMsg = "trying to kill {0} queued jobs for soft finish".format(
                                         len(queuedPandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpRet = self.taskBufferIF.killJobs(
                                         queuedPandaIDs, commentStr, '52',
                                         True)
                                     tmpMsg = "wating {0} jobs for soft finish".format(
                                         len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpRet = True
                                     tmpLog.info('done with {0}'.format(
                                         str(tmpRet)))
                                     break
                                 else:
                                     tmpMsg = "trying to kill {0} jobs".format(
                                         len(pandaIDs))
                                     tmpLog.info(tmpMsg)
                                     tmpLog.sendMsg(tmpMsg, self.msgType)
                                     if commandStr in ['finish']:
                                         # force kill
                                         tmpRet = self.taskBufferIF.killJobs(
                                             pandaIDs, commentStr, '52',
                                             True)
                                     elif commandStr in ['reassign']:
                                         # force kill
                                         tmpRet = self.taskBufferIF.killJobs(
                                             pandaIDs, commentStr, '51',
                                             True)
                                     else:
                                         # normal kill
                                         tmpRet = self.taskBufferIF.killJobs(
                                             pandaIDs, commentStr, '50',
                                             True)
                                     tmpLog.info('done with {0}'.format(
                                         str(tmpRet)))
                 elif commandStr in ['retry', 'incexec']:
                     tmpMsg = 'executing {0}'.format(commandStr)
                     tmpLog.info(tmpMsg)
                     tmpLog.sendMsg(tmpMsg, self.msgType)
                     # change task params for incexec
                     if commandStr == 'incexec':
                         try:
                             # read task params
                             taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(
                                 jediTaskID)
                             taskParamMap = RefinerUtils.decodeJSON(
                                 taskParam)
                             # remove some params
                             for newKey in ['nFiles', 'fixedSandbox']:
                                 try:
                                     del taskParamMap[newKey]
                                 except Exception:
                                     pass
                             # convert new params
                             newParamMap = RefinerUtils.decodeJSON(
                                 commentStr)
                             # change params
                             for newKey, newVal in iteritems(newParamMap):
                                 if newVal is None:
                                     # delete
                                     if newKey in taskParamMap:
                                         del taskParamMap[newKey]
                                 else:
                                     # change
                                     taskParamMap[newKey] = newVal
                             # overwrite sandbox
                             if 'fixedSandbox' in taskParamMap:
                                 # noBuild
                                 for tmpParam in taskParamMap[
                                         'jobParameters']:
                                     if tmpParam[
                                             'type'] == 'constant' and re.search(
                                                 '^-a [^ ]+$',
                                                 tmpParam['value']
                                             ) is not None:
                                         tmpParam['value'] = '-a {0}'.format(
                                             taskParamMap['fixedSandbox'])
                                 # build
                                 if 'buildSpec' in taskParamMap:
                                     taskParamMap['buildSpec'][
                                         'archiveName'] = taskParamMap[
                                             'fixedSandbox']
                                 # merge
                                 if 'mergeSpec' in taskParamMap:
                                     taskParamMap['mergeSpec']['jobParameters'] = \
                                         re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters'])
                             # encode new param
                             strTaskParams = RefinerUtils.encodeJSON(
                                 taskParamMap)
                             tmpRet = self.taskBufferIF.updateTaskParams_JEDI(
                                 jediTaskID, strTaskParams)
                             if tmpRet is not True:
                                 tmpLog.error(
                                     'failed to update task params')
                                 continue
                         except Exception as e:
                             tmpLog.error(
                                 'failed to change task params with {} {}'.
                                 format(str(e), traceback.format_exc()))
                             continue
                     # retry child tasks
                     if 'sole ' in commentStr:
                         retryChildTasks = False
                     else:
                         retryChildTasks = True
                     # discard events
                     if 'discard ' in commentStr:
                         discardEvents = True
                     else:
                         discardEvents = False
                     # release un-staged files
                     if 'staged ' in commentStr:
                         releaseUnstaged = True
                     else:
                         releaseUnstaged = False
                     tmpRet, newTaskStatus = self.taskBufferIF.retryTask_JEDI(
                         jediTaskID,
                         commandStr,
                         retryChildTasks=retryChildTasks,
                         discardEvents=discardEvents,
                         release_unstaged=releaseUnstaged)
                     if tmpRet is True:
                         tmpMsg = 'set task_status={0}'.format(
                             newTaskStatus)
                         tmpLog.sendMsg(tmpMsg, self.msgType)
                         tmpLog.info(tmpMsg)
                     tmpLog.info('done with {0}'.format(tmpRet))
                 else:
                     tmpLog.error('unknown command')
         except Exception as e:
             errStr = '{} failed in runImpl() with {} {} '.format(
                 self.__class__.__name__, str(e), traceback.format_exc())
             logger.error(errStr)
Exemplo n.º 6
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.info('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,splitRule,taskStatus,parent_tid in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,'< jediTaskID={0} >'.format(jediTaskID))
                 tmpLog.debug('start')
                 tmpStat = Interaction.SC_SUCCEEDED
                 errStr = ''
                 # read task parameters
                 try:
                     taskParam = None
                     taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                     taskParamMap = RefinerUtils.decodeJSON(taskParam)
                 except:
                     errtype,errvalue = sys.exc_info()[:2]
                     errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue)
                     tmpLog.debug(taskParam)
                     tmpLog.error(errStr)
                     continue
                     tmpStat = Interaction.SC_FAILED
                 # get impl
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('getting Impl')
                     try:
                         # get VO and sourceLabel
                         vo = taskParamMap['vo']
                         prodSourceLabel = taskParamMap['prodSourceLabel']
                         taskType = taskParamMap['taskType']
                         tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType))
                         # get impl
                         impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType,
                                                                 self.taskBufferIF,self.ddmIF)
                         if impl == None:
                             # task refiner is undefined
                             errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # extract common parameters
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('extracting common')
                     try:
                         # initalize impl
                         impl.initializeRefiner(tmpLog)
                         impl.oldTaskStatus = taskStatus
                         # extract common parameters
                         impl.extractCommon(jediTaskID,taskParamMap,self.workQueueMapper,splitRule)
                         # set parent tid
                         if not parent_tid in [None,jediTaskID]:
                             impl.taskSpec.parent_tid = parent_tid
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to extract common parameters with {0}:{1} {2}'.format(errtype.__name__,errvalue,
                                                                                                traceback.format_exc())
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # check attribute length
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('checking attribute length')
                     if not impl.taskSpec.checkAttrLength():
                         tmpLog.error(impl.taskSpec.errorDialog)
                         tmpStat = Interaction.SC_FAILED
                 # check parent
                 noWaitParent = False
                 parentState = None
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     if not parent_tid in [None,jediTaskID]:
                         tmpLog.info('check parent task')
                         try:
                             tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid)
                             parentState = tmpStat
                             if tmpStat == 'completed':
                                 # parent is done
                                 tmpStat = Interaction.SC_SUCCEEDED
                             elif tmpStat == 'running':
                                 if not impl.taskSpec.noWaitParent():
                                     # parent is running
                                     errStr = 'pending until parent task {0} is done'.format(parent_tid)
                                     impl.taskSpec.status = taskStatus
                                     impl.taskSpec.setOnHold()
                                     impl.taskSpec.setErrDiag(errStr)
                                     tmpLog.info(errStr)
                                     self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                                       oldStatus=[taskStatus],setFrozenTime=False)
                                     continue
                                 else:
                                     # not wait for parent
                                     tmpStat = Interaction.SC_SUCCEEDED
                                     noWaitParent = True
                             else:
                                 # parent is corrupted
                                 tmpStat = Interaction.SC_FAILED
                                 tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid)
                                 impl.taskSpec.setErrDiag(tmpErrStr)
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # refine
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('refining with {0}'.format(impl.__class__.__name__))
                     try:
                         tmpStat = impl.doRefine(jediTaskID,taskParamMap)
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         # wait unknown input if noWaitParent or waitInput
                         if ((impl.taskSpec.noWaitParent() or impl.taskSpec.waitInput()) \
                                 and errtype == JediException.UnknownDatasetError) or parentState == 'running' \
                                 or errtype == Interaction.JEDITemporaryError:
                             if impl.taskSpec.noWaitParent() or parentState == 'running':
                                 tmpErrStr = 'pending until parent produces input'
                                 setFrozenTime=False
                             elif errtype == Interaction.JEDITemporaryError:
                                 tmpErrStr = 'pending due to DDM problem. {0}'.format(errvalue)
                                 setFrozenTime=True
                             else:
                                 tmpErrStr = 'pending until input is staged'
                                 setFrozenTime=True
                             impl.taskSpec.status = taskStatus
                             impl.taskSpec.setOnHold()
                             impl.taskSpec.setErrDiag(tmpErrStr)
                             tmpLog.info(tmpErrStr)
                             self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                               oldStatus=[taskStatus],
                                                               insertUnknown=impl.unknownDatasetList,
                                                               setFrozenTime=setFrozenTime)
                             continue
                         else:
                             errStr  = 'failed to refine task with {0}:{1}'.format(errtype.__name__,errvalue)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # register
                 if tmpStat != Interaction.SC_SUCCEEDED:
                     tmpLog.error('failed to refine the task')
                     if impl == None or impl.taskSpec == None:
                         tmpTaskSpec = JediTaskSpec()
                         tmpTaskSpec.jediTaskID = jediTaskID
                     else:
                         tmpTaskSpec = impl.taskSpec
                     tmpTaskSpec.status = 'tobroken'
                     if errStr != '':
                         tmpTaskSpec.setErrDiag(errStr,True)
                     self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID},oldStatus=[taskStatus])
                 else:
                     tmpLog.info('registering')                    
                     # fill JEDI tables
                     try:
                         # enable protection against task duplication
                         if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \
                                 not impl.taskSpec.checkPreProcessed():
                             uniqueTaskName = True
                         else:
                             uniqueTaskName = False
                         strTaskParams = None
                         if impl.updatedTaskParams != None:
                             strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams)
                         if taskStatus == 'registered':
                             # unset pre-process flag
                             if impl.taskSpec.checkPreProcessed():
                                 impl.taskSpec.setPostPreProcess()
                             # full registration
                             tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec,
                                                                                                  impl.inMasterDatasetSpec,
                                                                                                  impl.inSecDatasetSpecList,
                                                                                                  impl.outDatasetSpecList,
                                                                                                  impl.outputTemplateMap,
                                                                                                  impl.jobParamsTemplate,
                                                                                                  strTaskParams,
                                                                                                  impl.unmergeMasterDatasetSpec,
                                                                                                  impl.unmergeDatasetSpecMap,
                                                                                                  uniqueTaskName,
                                                                                                  taskStatus) 
                             if not tmpStat:
                                 tmpErrStr = 'failed to register the task to JEDI in a single shot'
                                 tmpLog.error(tmpErrStr)
                                 impl.taskSpec.status = newTaskStatus
                                 impl.taskSpec.setErrDiag(tmpErrStr,True)
                                 self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                                   oldStatus=[taskStatus])
                             tmpMsg = 'set task.status={0}'.format(newTaskStatus)
                             tmpLog.info(tmpMsg)
                             tmpLog.sendMsg(tmpMsg,self.msgType)
                         else:
                             # disable scouts if previous attempt didn't use it
                             if not impl.taskSpec.useScout(splitRule):
                                 impl.taskSpec.setUseScout(False)
                             # update task with new params
                             self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                               oldStatus=[taskStatus])
                             # appending for incremetnal execution
                             tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec,
                                                                             impl.inSecDatasetSpecList)
                             if not tmpStat:
                                 tmpLog.error('failed to append datasets for incexec')
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(tmpErrStr)
                     else:
                         tmpLog.info('done')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
Exemplo n.º 7
0
 def runImpl(self):
     while True:
         try:
             # get a part of list
             nTasks = 10
             taskList = self.taskList.get(nTasks)
             # no more datasets
             if len(taskList) == 0:
                 self.logger.info('{0} terminating since no more items'.format(self.__class__.__name__))
                 return
             # loop over all tasks
             for jediTaskID,splitRule,taskStatus,parent_tid in taskList:
                 # make logger
                 tmpLog = MsgWrapper(self.logger,'< jediTaskID={0} >'.format(jediTaskID))
                 tmpLog.debug('start')
                 tmpStat = Interaction.SC_SUCCEEDED
                 errStr = ''
                 # read task parameters
                 try:
                     taskParam = None
                     taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID)
                     taskParamMap = RefinerUtils.decodeJSON(taskParam)
                 except:
                     errtype,errvalue = sys.exc_info()[:2]
                     errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue)
                     tmpLog.debug(taskParam)
                     tmpLog.error(errStr)
                     continue
                     tmpStat = Interaction.SC_FAILED
                 # get impl
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('getting Impl')
                     try:
                         # get VO and sourceLabel
                         vo = taskParamMap['vo']
                         prodSourceLabel = taskParamMap['prodSourceLabel']
                         taskType = taskParamMap['taskType']
                         tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType))
                         # get impl
                         impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType,
                                                                 self.taskBufferIF,self.ddmIF)
                         if impl == None:
                             # task refiner is undefined
                             errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # extract common parameters
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('extracting common')
                     try:
                         # initalize impl
                         impl.initializeRefiner(tmpLog)
                         impl.oldTaskStatus = taskStatus
                         # extract common parameters
                         impl.extractCommon(jediTaskID, taskParamMap, self.workQueueMapper, splitRule)
                         # set parent tid
                         if not parent_tid in [None,jediTaskID]:
                             impl.taskSpec.parent_tid = parent_tid
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         errStr = 'failed to extract common parameters with {0}:{1} {2}'.format(errtype.__name__,errvalue,
                                                                                                traceback.format_exc())
                         tmpLog.error(errStr)
                         tmpStat = Interaction.SC_FAILED
                 # check attribute length
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('checking attribute length')
                     if not impl.taskSpec.checkAttrLength():
                         tmpLog.error(impl.taskSpec.errorDialog)
                         tmpStat = Interaction.SC_FAILED
                 # staging
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     if 'toStaging' in taskParamMap and taskStatus <> 'staged':
                         errStr = 'wait until staging is done'
                         impl.taskSpec.status = 'staging'
                         impl.taskSpec.oldStatus = taskStatus
                         impl.taskSpec.setErrDiag(errStr)
                         # not to update some task attributes
                         impl.taskSpec.resetRefinedAttrs()
                         tmpLog.info(errStr)
                         self.taskBufferIF.updateTask_JEDI(impl.taskSpec, {'jediTaskID':impl.taskSpec.jediTaskID},
                                                           oldStatus=[taskStatus], updateDEFT=False, setFrozenTime=False)
                         continue
                 # check parent
                 noWaitParent = False
                 parentState = None
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     if parent_tid not in [None,jediTaskID]:
                         tmpLog.info('check parent task')
                         try:
                             tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid)
                             parentState = tmpStat
                             if tmpStat == 'completed':
                                 # parent is done
                                 tmpStat = Interaction.SC_SUCCEEDED
                             elif tmpStat == 'running':
                                 if not impl.taskSpec.noWaitParent():
                                     # parent is running
                                     errStr = 'pending until parent task {0} is done'.format(parent_tid)
                                     impl.taskSpec.status = taskStatus
                                     impl.taskSpec.setOnHold()
                                     impl.taskSpec.setErrDiag(errStr)
                                     # not to update some task attributes
                                     impl.taskSpec.resetRefinedAttrs()
                                     tmpLog.info(errStr)
                                     self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                                       oldStatus=[taskStatus],setFrozenTime=False)
                                     continue
                                 else:
                                     # not wait for parent
                                     tmpStat = Interaction.SC_SUCCEEDED
                                     noWaitParent = True
                             else:
                                 # parent is corrupted
                                 tmpStat = Interaction.SC_FAILED
                                 tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid)
                                 impl.taskSpec.setErrDiag(tmpErrStr)
                         except:
                             errtype,errvalue = sys.exc_info()[:2]
                             errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # refine
                 if tmpStat == Interaction.SC_SUCCEEDED:
                     tmpLog.info('refining with {0}'.format(impl.__class__.__name__))
                     try:
                         tmpStat = impl.doRefine(jediTaskID,taskParamMap)
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         # wait unknown input if noWaitParent or waitInput
                         if ((impl.taskSpec.noWaitParent() or impl.taskSpec.waitInput()) \
                                 and errtype == JediException.UnknownDatasetError) or parentState == 'running' \
                                 or errtype == Interaction.JEDITemporaryError:
                             if impl.taskSpec.noWaitParent() or parentState == 'running':
                                 tmpErrStr = 'pending until parent produces input'
                                 setFrozenTime=False
                             elif errtype == Interaction.JEDITemporaryError:
                                 tmpErrStr = 'pending due to DDM problem. {0}'.format(errvalue)
                                 setFrozenTime=True
                             else:
                                 tmpErrStr = 'pending until input is staged'
                                 setFrozenTime=True
                             impl.taskSpec.status = taskStatus
                             impl.taskSpec.setOnHold()
                             impl.taskSpec.setErrDiag(tmpErrStr)
                             # not to update some task attributes
                             impl.taskSpec.resetRefinedAttrs()
                             tmpLog.info(tmpErrStr)
                             self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                               oldStatus=[taskStatus],
                                                               insertUnknown=impl.unknownDatasetList,
                                                               setFrozenTime=setFrozenTime)
                             continue
                         else:
                             errStr  = 'failed to refine task with {0}:{1}'.format(errtype.__name__,errvalue)
                             tmpLog.error(errStr)
                             tmpStat = Interaction.SC_FAILED
                 # register
                 if tmpStat != Interaction.SC_SUCCEEDED:
                     tmpLog.error('failed to refine the task')
                     if impl == None or impl.taskSpec == None:
                         tmpTaskSpec = JediTaskSpec()
                         tmpTaskSpec.jediTaskID = jediTaskID
                     else:
                         tmpTaskSpec = impl.taskSpec
                     tmpTaskSpec.status = 'tobroken'
                     if errStr != '':
                         tmpTaskSpec.setErrDiag(errStr,True)
                     self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID},oldStatus=[taskStatus])
                 else:
                     tmpLog.info('registering')                    
                     # fill JEDI tables
                     try:
                         # enable protection against task duplication
                         if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \
                                 not impl.taskSpec.checkPreProcessed():
                             uniqueTaskName = True
                         else:
                             uniqueTaskName = False
                         strTaskParams = None
                         if impl.updatedTaskParams != None:
                             strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams)
                         if taskStatus in ['registered', 'staged']:
                             # unset pre-process flag
                             if impl.taskSpec.checkPreProcessed():
                                 impl.taskSpec.setPostPreProcess()
                             # full registration
                             tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec,
                                                                                                  impl.inMasterDatasetSpec,
                                                                                                  impl.inSecDatasetSpecList,
                                                                                                  impl.outDatasetSpecList,
                                                                                                  impl.outputTemplateMap,
                                                                                                  impl.jobParamsTemplate,
                                                                                                  strTaskParams,
                                                                                                  impl.unmergeMasterDatasetSpec,
                                                                                                  impl.unmergeDatasetSpecMap,
                                                                                                  uniqueTaskName,
                                                                                                  taskStatus) 
                             if not tmpStat:
                                 tmpErrStr = 'failed to register the task to JEDI in a single shot'
                                 tmpLog.error(tmpErrStr)
                                 impl.taskSpec.status = newTaskStatus
                                 impl.taskSpec.setErrDiag(tmpErrStr,True)
                                 self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                                   oldStatus=[taskStatus])
                             tmpMsg = 'set task_status={0}'.format(newTaskStatus)
                             tmpLog.info(tmpMsg)
                             tmpLog.sendMsg(tmpMsg,self.msgType)
                         else:
                             # disable scouts if previous attempt didn't use it
                             if not impl.taskSpec.useScout(splitRule):
                                 impl.taskSpec.setUseScout(False)
                             # disallow to reset some attributes
                             for attName in ['ramCount', 'walltime', 'cpuTime', 'startTime']:
                                 impl.taskSpec.resetChangedAttr(attName)
                             # update task with new params
                             self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID},
                                                               oldStatus=[taskStatus])
                             # appending for incremetnal execution
                             tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec,
                                                                             impl.inSecDatasetSpecList)
                             if not tmpStat:
                                 tmpLog.error('failed to append datasets for incexec')
                     except:
                         errtype,errvalue = sys.exc_info()[:2]
                         tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue)
                         tmpLog.error(tmpErrStr)
                     else:
                         tmpLog.info('done')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))