Python EventServiceUtils примеры использования

Язык программирования: Python

Пространство имен/Пакет: pandaserver.taskbuffer

Класс/Тип: EventServiceUtils

Примеров на hotexamples.com: 16

Python EventServiceUtils - 16 примеров найдено. Это лучшие примеры Python кода для pandaserver.taskbuffer.EventServiceUtils, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

isEventServiceMerge(5)

isEventServiceJob(4)

isJobCloningJob(3)

isJumboJob(3)

getEsDatasetName(2)

getJobCloningValue(2)

getJobCloningType(1)

isCoJumboJob(1)

Пример #1

Показать файл

 def __init__(self, taskBuffer, jobs, logger, params, defaultMap):
     self.jobs = []
     self.jumboJobs = []
     # separate normal and jumbo jobs
     for tmpJob in jobs:
         if EventServiceUtils.isJumboJob(tmpJob):
             self.jumboJobs.append(tmpJob)
         else:
             self.jobs.append(tmpJob)
     self.taskBuffer = taskBuffer
     self.logger = logger
     # set named parameters
     for tmpKey in params:
         tmpVal = params[tmpKey]
         setattr(self, tmpKey, tmpVal)
     # set defaults
     for tmpKey in defaultMap:
         tmpVal = defaultMap[tmpKey]
         if not hasattr(self, tmpKey):
             setattr(self, tmpKey, tmpVal)

Пример #2

Показать файл

 def doPostProcess(self,taskSpec,tmpLog):
     # pre-check
     try:
         tmpStat = self.doPreCheck(taskSpec,tmpLog)
         if tmpStat:
             return self.SC_SUCCEEDED
     except Exception:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doPreCheck failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     # get DDM I/F
     ddmIF = self.ddmIF.getInterface(taskSpec.vo)
     # loop over all datasets
     for datasetSpec in taskSpec.datasetSpecList:
         # skip pseudo output datasets
         if datasetSpec.type in ['output'] and datasetSpec.isPseudo():
             continue
         try:
             # remove wrong files
             if datasetSpec.type in ['output']:
                 # get successful files
                 okFiles = self.taskBufferIF.getSuccessfulFiles_JEDI(datasetSpec.jediTaskID,datasetSpec.datasetID)
                 if okFiles is None:
                     tmpLog.warning('failed to get successful files for {0}'.format(datasetSpec.datasetName))
                     return self.SC_FAILED
                 # get files in dataset
                 ddmFiles = ddmIF.getFilesInDataset(datasetSpec.datasetName,skipDuplicate=False,ignoreUnknown=True)
                 tmpLog.debug('datasetID={0}:Name={1} has {2} files in DB, {3} files in DDM'.format(datasetSpec.datasetID,
                                                                                                   datasetSpec.datasetName,
                                                                                                   len(okFiles),len(ddmFiles)))
                 # check all files
                 toDelete = []
                 for tmpGUID,attMap in iteritems(ddmFiles):
                     if attMap['lfn'] not in okFiles:
                         did = {'scope':attMap['scope'], 'name':attMap['lfn']}
                         toDelete.append(did)
                         tmpLog.debug('delete {0} from {1}'.format(attMap['lfn'],datasetSpec.datasetName))
                 # delete
                 if toDelete != []:
                     ddmIF.deleteFilesFromDataset(datasetSpec.datasetName,toDelete)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to remove wrong files with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # freeze output and log datasets
             if datasetSpec.type in ['output','log','trn_log']:
                 tmpLog.info('freezing datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 ddmIF.freezeDataset(datasetSpec.datasetName,ignoreUnknown=True)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to freeze datasets with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # delete transient datasets
             if datasetSpec.type in ['trn_output']:
                 tmpLog.debug('deleting datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 retStr = ddmIF.deleteDataset(datasetSpec.datasetName,False,ignoreUnknown=True)
                 tmpLog.info(retStr)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete datasets with {0}:{1}'.format(errtype.__name__,errvalue))
     # check duplication
     if self.getFinalTaskStatus(taskSpec) in ['finished','done'] and taskSpec.gshare != 'Test':
         nDup = self.taskBufferIF.checkDuplication_JEDI(taskSpec.jediTaskID)
         tmpLog.debug('checked duplication with {0}'.format(nDup))
         if nDup > 0:
             errStr = 'paused since {0} duplication found'.format(nDup)
             taskSpec.oldStatus = self.getFinalTaskStatus(taskSpec)
             taskSpec.status = 'paused'
             taskSpec.setErrDiag(errStr)
             tmpLog.debug(errStr)
     # delete ES datasets
     if taskSpec.registerEsFiles():
         try:
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             tmpLog.debug('deleting ES dataset name={0}'.format(targetName))
             retStr = ddmIF.deleteDataset(targetName,False,ignoreUnknown=True)
             tmpLog.debug(retStr)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete ES dataset with {0}:{1}'.format(errtype.__name__,errvalue))
     try:
         AtlasPostProcessorUtils.send_notification(self.taskBufferIF, ddmIF, taskSpec, tmpLog)
     except Exception as e:
         tmpLog.error('failed to talk to external system with {0}'.format(str(e)))
         return self.SC_FAILED
     try:
         self.doBasicPostProcess(taskSpec,tmpLog)
     except Exception:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doBasicPostProcess failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     return self.SC_SUCCEEDED

Пример #3

Показать файл

Файл: TaskRefinerBase.py Проект: ruslan33/panda-jedi

 def extractCommon(self,jediTaskID,taskParamMap,workQueueMapper,splitRule):
     # make task spec
     taskSpec = JediTaskSpec()
     taskSpec.jediTaskID = jediTaskID
     taskSpec.taskName = taskParamMap['taskName']
     taskSpec.userName = taskParamMap['userName']
     taskSpec.vo = taskParamMap['vo']     
     taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel']
     taskSpec.taskPriority = taskParamMap['taskPriority']
     taskSpec.currentPriority = taskSpec.taskPriority
     taskSpec.architecture = taskParamMap['architecture']
     taskSpec.transUses = taskParamMap['transUses']
     taskSpec.transHome = taskParamMap['transHome']
     taskSpec.transPath = taskParamMap['transPath']
     taskSpec.processingType = taskParamMap['processingType']
     taskSpec.taskType = taskParamMap['taskType']
     taskSpec.splitRule = splitRule
     taskSpec.startTime = datetime.datetime.utcnow()
     if taskParamMap.has_key('workingGroup'):
         taskSpec.workingGroup = taskParamMap['workingGroup']
     if taskParamMap.has_key('countryGroup'):
         taskSpec.countryGroup = taskParamMap['countryGroup']
     if taskParamMap.has_key('ticketID'):
         taskSpec.ticketID = taskParamMap['ticketID']
     if taskParamMap.has_key('ticketSystemType'):
         taskSpec.ticketSystemType = taskParamMap['ticketSystemType']
     if taskParamMap.has_key('reqID'):
         taskSpec.reqID = taskParamMap['reqID']
     else:
         taskSpec.reqID = jediTaskID
     if taskParamMap.has_key('coreCount'):
         taskSpec.coreCount = taskParamMap['coreCount']
     else:
         taskSpec.coreCount = 1
     if taskParamMap.has_key('walltime'):
         taskSpec.walltime = taskParamMap['walltime']
     else:
         taskSpec.walltime = 0
     if taskParamMap.has_key('walltimeUnit'):
         taskSpec.walltimeUnit = taskParamMap['walltimeUnit']
     if taskParamMap.has_key('outDiskCount'):
         taskSpec.outDiskCount = taskParamMap['outDiskCount']
     else:
         taskSpec.outDiskCount = 0
     if 'outDiskUnit' in taskParamMap:
         taskSpec.outDiskUnit = taskParamMap['outDiskUnit']
     if taskParamMap.has_key('workDiskCount'):
         taskSpec.workDiskCount = taskParamMap['workDiskCount']
     else:
         taskSpec.workDiskCount = 0
     if taskParamMap.has_key('workDiskUnit'):
         taskSpec.workDiskUnit = taskParamMap['workDiskUnit']
     if taskParamMap.has_key('ramCount'):
         taskSpec.ramCount = taskParamMap['ramCount']
     else:
         taskSpec.ramCount = 0
     if taskParamMap.has_key('ramUnit'):
         taskSpec.ramUnit = taskParamMap['ramUnit']
     if taskParamMap.has_key('baseRamCount'):
         taskSpec.baseRamCount = taskParamMap['baseRamCount']
     else:
         taskSpec.baseRamCount = 0
     # HS06 stuff
     if 'cpuTimeUnit' in taskParamMap:
         taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit']
     if 'cpuTime' in taskParamMap:
         taskSpec.cpuTime = taskParamMap['cpuTime']
     if 'cpuEfficiency' in taskParamMap:
         taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency']
     else:
         # 90% of cpu efficiency by default
         taskSpec.cpuEfficiency = 90
     if 'baseWalltime' in taskParamMap:
         taskSpec.baseWalltime = taskParamMap['baseWalltime']
     else:
         # 10min of offset by default
         taskSpec.baseWalltime = 10*60
     # for merge
     if 'mergeRamCount' in taskParamMap:
         taskSpec.mergeRamCount = taskParamMap['mergeRamCount']
     if 'mergeCoreCount' in taskParamMap:
         taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount']
     # scout
     if not taskParamMap.has_key('skipScout') and not taskSpec.isPostScout():
         taskSpec.setUseScout(True)
     # cloud
     if taskParamMap.has_key('cloud'):
         self.cloudName = taskParamMap['cloud']
         taskSpec.cloud = self.cloudName
     else:
         # set dummy to force update
         taskSpec.cloud = 'dummy'
         taskSpec.cloud = None
     # site
     if taskParamMap.has_key('site'):
         self.siteName = taskParamMap['site']
         taskSpec.site = self.siteName
     else:
         # set dummy to force update
         taskSpec.site = 'dummy'
         taskSpec.site = None
     # nucleus
     if 'nucleus' in taskParamMap:
         taskSpec.nucleus = taskParamMap['nucleus']
     # preset some parameters for job cloning
     if 'useJobCloning' in taskParamMap:
         # set implicit parameters
         if not 'nEventsPerWorker' in taskParamMap:
             taskParamMap['nEventsPerWorker'] = 1
         if not 'nSitesPerJob' in taskParamMap:
             taskParamMap['nSitesPerJob'] = 2
         if not 'nEsConsumers' in taskParamMap:
             taskParamMap['nEsConsumers'] = taskParamMap['nSitesPerJob']
     # event service
     if taskParamMap.has_key('nEventsPerWorker'):
         taskSpec.eventService = 1
     else:
         taskSpec.eventService = 0
     # ttcr: requested time to completion
     if taskParamMap.has_key('ttcrTimestamp'):
         try:
             # get rid of the +00:00 timezone string and parse the timestamp
             taskSpec.ttcRequested = datetime.datetime.strptime(taskParamMap['ttcrTimestamp'].split('+')[0], '%Y-%m-%d %H:%M:%S.%f')
         except (IndexError, ValueError):
             pass
     # goal
     if 'goal' in taskParamMap:
         try:
             taskSpec.goal = int(float(taskParamMap['goal'])*10)
             if taskSpec.goal >= 1000:
                 taskSpec.goal = None
         except:
             pass
     # campaign
     if taskParamMap.has_key('campaign'):
         taskSpec.campaign = taskParamMap['campaign']
     # work queue
     workQueue = None
     if 'workQueueName' in taskParamMap:
         # work queue is specified
         workQueue = workQueueMapper.getQueueWithName(taskSpec.vo,taskSpec.prodSourceLabel,taskParamMap['workQueueName'])
     if workQueue == None:
         # get work queue based on task attributes
         workQueue,tmpStr = workQueueMapper.getQueueWithSelParams(taskSpec.vo,
                                                                  taskSpec.prodSourceLabel,
                                                                  processingType=taskSpec.processingType,
                                                                  workingGroup=taskSpec.workingGroup,
                                                                  coreCount=taskSpec.coreCount,
                                                                  site=taskSpec.site)
     if workQueue == None:
         errStr  = 'workqueue is undefined for vo={0} labal={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel)
         errStr += 'processingType={0} workingGroup={1} coreCount={2} '.format(taskSpec.processingType,
                                                                               taskSpec.workingGroup,
                                                                               taskSpec.coreCount)
         raise RuntimeError,errStr
     taskSpec.workQueue_ID = workQueue.queue_id
     self.taskSpec = taskSpec
     # set split rule    
     if 'tgtNumEventsPerJob' in taskParamMap:
         # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used
         if not 'nFilesPerJob' in taskParamMap:
             self.setSplitRule(None,taskParamMap['tgtNumEventsPerJob'],JediTaskSpec.splitRuleToken['nEventsPerJob'])
     self.setSplitRule(taskParamMap,'nFilesPerJob',     JediTaskSpec.splitRuleToken['nFilesPerJob'])
     self.setSplitRule(taskParamMap,'nEventsPerJob',    JediTaskSpec.splitRuleToken['nEventsPerJob'])
     self.setSplitRule(taskParamMap,'nGBPerJob',        JediTaskSpec.splitRuleToken['nGBPerJob'])
     self.setSplitRule(taskParamMap,'nMaxFilesPerJob',  JediTaskSpec.splitRuleToken['nMaxFilesPerJob'])
     self.setSplitRule(taskParamMap,'nEventsPerWorker', JediTaskSpec.splitRuleToken['nEventsPerWorker'])
     self.setSplitRule(taskParamMap,'useLocalIO',       JediTaskSpec.splitRuleToken['useLocalIO'])
     self.setSplitRule(taskParamMap,'disableAutoRetry', JediTaskSpec.splitRuleToken['disableAutoRetry'])
     self.setSplitRule(taskParamMap,'nEsConsumers',     JediTaskSpec.splitRuleToken['nEsConsumers'])
     self.setSplitRule(taskParamMap,'waitInput',        JediTaskSpec.splitRuleToken['waitInput'])
     self.setSplitRule(taskParamMap,'addNthFieldToLFN', JediTaskSpec.splitRuleToken['addNthFieldToLFN'])
     self.setSplitRule(taskParamMap,'scoutSuccessRate', JediTaskSpec.splitRuleToken['scoutSuccessRate'])
     self.setSplitRule(taskParamMap,'t1Weight',         JediTaskSpec.splitRuleToken['t1Weight'])
     self.setSplitRule(taskParamMap,'maxAttemptES',     JediTaskSpec.splitRuleToken['maxAttemptES'])
     self.setSplitRule(taskParamMap,'nSitesPerJob',     JediTaskSpec.splitRuleToken['nSitesPerJob'])
     self.setSplitRule(taskParamMap,'nEventsPerMergeJob',   JediTaskSpec.splitRuleToken['nEventsPerMergeJob'])
     self.setSplitRule(taskParamMap,'nFilesPerMergeJob',    JediTaskSpec.splitRuleToken['nFilesPerMergeJob'])
     self.setSplitRule(taskParamMap,'nGBPerMergeJob',       JediTaskSpec.splitRuleToken['nGBPerMergeJob'])
     self.setSplitRule(taskParamMap,'nMaxFilesPerMergeJob', JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob'])
     if taskParamMap.has_key('loadXML'):
         self.setSplitRule(None,3,JediTaskSpec.splitRuleToken['loadXML'])
         self.setSplitRule(None,4,JediTaskSpec.splitRuleToken['groupBoundaryID'])
     if taskParamMap.has_key('pfnList'):
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['pfnList'])
     if taskParamMap.has_key('noWaitParent'):
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['noWaitParent'])
     if 'respectLB' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['respectLB'])
     if taskParamMap.has_key('reuseSecOnDemand'):
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['reuseSecOnDemand'])
     if 'ddmBackEnd' in taskParamMap:
         self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd'])
     if 'disableReassign' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['disableReassign'])
     if 'allowPartialFinish' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowPartialFinish'])
     if 'useExhausted' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useExhausted'])
     if 'useRealNumEvents' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useRealNumEvents'])
     if 'ipConnectivity' in taskParamMap:
         self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity'])
     if 'altStageOut' in taskParamMap:
         self.taskSpec.setAltStageOut(taskParamMap['altStageOut'])
     if 'allowInputLAN' in taskParamMap:
         self.taskSpec.setAllowInputLAN(taskParamMap['allowInputLAN'])
     if 'runUntilClosed' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['runUntilClosed'])
     if 'stayOutputOnSite' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['stayOutputOnSite'])
     if 'useJobCloning' in taskParamMap:
         scValue = EventServiceUtils.getJobCloningValue(taskParamMap['useJobCloning'])
         self.setSplitRule(None,scValue,JediTaskSpec.splitRuleToken['useJobCloning'])
     if 'failWhenGoalUnreached' in taskParamMap and taskParamMap['failWhenGoalUnreached'] == True:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['failGoalUnreached'])
     if 'switchEStoNormal' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['switchEStoNormal'])
     if 'nEventsPerRange' in taskParamMap:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['dynamicNumEvents'])
     if 'allowInputWAN' in taskParamMap and taskParamMap['allowInputWAN'] == True:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowInputWAN'])
     if 'putLogToOS' in taskParamMap and taskParamMap['putLogToOS'] == True:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['putLogToOS'])
     # return
     return

Пример #4

Показать файл

Файл: AtlasTaskSetupper.py Проект: PanDAWMS/panda-jedi

    def doSetup(self,taskSpec,datasetToRegister,pandaJobs):
        # make logger
        tmpLog = MsgWrapper(logger,"< jediTaskID={0} >".format(taskSpec.jediTaskID))
        tmpLog.info('start label={0} taskType={1}'.format(taskSpec.prodSourceLabel,taskSpec.taskType))
        # returns
        retFatal    = self.SC_FATAL
        retTmpError = self.SC_FAILED
        retOK       = self.SC_SUCCEEDED
        try:
            # get DDM I/F
            ddmIF = self.ddmIF.getInterface(taskSpec.vo)
            # register datasets
            if datasetToRegister != [] or taskSpec.prodSourceLabel in ['user']:
                # prod vs anal
                userSetup = False
                if taskSpec.prodSourceLabel in ['user']:
                    userSetup = True
                    # collect datasetID to register datasets/containers just in case
                    for tmpPandaJob in pandaJobs:
                        if not tmpPandaJob.produceUnMerge():
                            for tmpFileSpec in tmpPandaJob.Files:
                                if tmpFileSpec.type in ['output','log']:
                                    if tmpFileSpec.datasetID not in datasetToRegister:
                                        datasetToRegister.append(tmpFileSpec.datasetID)
                tmpLog.info('datasetToRegister={0}'.format(str(datasetToRegister)))
                # get site mapper
                siteMapper = self.taskBufferIF.getSiteMapper()

                # loop over all datasets
                avDatasetList = []
                cnDatasetMap  = {}
                for datasetID in datasetToRegister:
                    # get output and log datasets
                    tmpLog.info('getting datasetSpec with datasetID={0}'.format(datasetID))
                    tmpStat,datasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(taskSpec.jediTaskID,
                                                                                  datasetID)
                    if not tmpStat:
                        tmpLog.error('failed to get output and log datasets')
                        return retFatal
                    if datasetSpec.isPseudo():
                        tmpLog.info('skip pseudo dataset')
                        continue
                    # DDM backend
                    ddmBackEnd = taskSpec.getDdmBackEnd()
                    tmpLog.info('checking {0}'.format(datasetSpec.datasetName))
                    # check if dataset and container are available in DDM
                    for targetName in [datasetSpec.datasetName,datasetSpec.containerName]:
                        if targetName is None:
                            continue
                        if targetName not in avDatasetList:
                            # set lifetime
                            if targetName.startswith('panda'):
                                if datasetSpec.type == 'trn_log' and taskSpec.prodSourceLabel == 'managed':
                                    lifetime = 365
                                else:
                                    lifetime = 14
                            else:
                                lifetime = None
                            # check dataset/container in DDM
                            tmpList = ddmIF.listDatasets(targetName)
                            if tmpList == []:
                                # get location
                                location = None
                                locForRule = None
                                if targetName == datasetSpec.datasetName:
                                    # dataset
                                    if datasetSpec.site in ['',None]:
                                        if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                            locForRule = datasetSpec.destination
                                        elif DataServiceUtils.getDestinationSE(datasetSpec.storageToken) is not None:
                                            location = DataServiceUtils.getDestinationSE(datasetSpec.storageToken)
                                        elif taskSpec.cloud is not None:
                                            # use T1 SE
                                            tmpT1Name = siteMapper.getCloud(taskSpec.cloud)['source']
                                            location = siteMapper.getDdmEndpoint(tmpT1Name, datasetSpec.storageToken,
                                                                                 taskSpec.prodSourceLabel,
                                                                                 JobUtils.translate_tasktype_to_jobtype(taskSpec.taskType))
                                    else:
                                        tmpLog.info('site={0} token={1}'.format(datasetSpec.site, datasetSpec.storageToken))
                                        location = siteMapper.getDdmEndpoint(datasetSpec.site,datasetSpec.storageToken,
                                                                             taskSpec.prodSourceLabel,
                                                                             JobUtils.translate_tasktype_to_jobtype(taskSpec.taskType))
                                if locForRule is None:
                                    locForRule = location
                                # set metadata
                                if taskSpec.prodSourceLabel in ['managed','test'] and targetName == datasetSpec.datasetName:
                                    metaData = {}
                                    metaData['task_id'] = taskSpec.jediTaskID
                                    if taskSpec.campaign not in [None,'']:
                                        metaData['campaign'] = taskSpec.campaign
                                    if datasetSpec.getTransient() is not None:
                                        metaData['transient'] = datasetSpec.getTransient()
                                else:
                                    metaData = None
                                # register dataset/container
                                tmpLog.info('registering {0} with location={1} backend={2} lifetime={3} meta={4}'.format(targetName,
                                                                                                                         location,
                                                                                                                         ddmBackEnd,
                                                                                                                         lifetime,
                                                                                                                         str(metaData)))
                                tmpStat = ddmIF.registerNewDataset(targetName,backEnd=ddmBackEnd,location=location,
                                                                   lifetime=lifetime,metaData=metaData)
                                if not tmpStat:
                                    tmpLog.error('failed to register {0}'.format(targetName))
                                    return retFatal
                                # procedures for user
                                if userSetup or DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                    # register location
                                    tmpToRegister = False
                                    if userSetup and targetName == datasetSpec.datasetName and datasetSpec.site not in ['',None]:
                                        if taskSpec.workingGroup:
                                            userName = taskSpec.workingGroup
                                        else:
                                            userName = taskSpec.userName
                                        grouping = None
                                        tmpToRegister = True
                                    elif DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                        userName = None
                                        grouping = 'NONE'
                                        tmpToRegister = True
                                    if tmpToRegister:
                                        activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                                        tmpLog.info('registering location={} lifetime={} days activity={} grouping={} '
                                                    'owner={}'.format(locForRule, lifetime, activity, grouping,
                                                                      userName))
                                        tmpStat = ddmIF.registerDatasetLocation(targetName,locForRule,owner=userName,
                                                                                lifetime=lifetime,backEnd=ddmBackEnd,
                                                                                activity=activity,grouping=grouping)
                                        if not tmpStat:
                                            tmpLog.error('failed to register location {0} for {1}'.format(locForRule,
                                                                                                          targetName))
                                            return retFatal
                                        # double copy
                                        if userSetup and datasetSpec.type == 'output':
                                            if datasetSpec.destination != datasetSpec.site:
                                                tmpLog.info('skip making double copy as destination={0} is not site={1}'.format(datasetSpec.destination,
                                                                                                                                datasetSpec.site))
                                            else:

                                                second_copy = True
                                                try:
                                                    if taskSpec.site:
                                                        panda_site = siteMapper.getSite(taskSpec.site)
                                                        if panda_site.catchall and 'skip_2nd_copy' in panda_site.catchall:
                                                            tmpLog.info('skip making double copy as specified in {0} catchall'.format(panda_site))
                                                            second_copy = False
                                                except Exception:
                                                    second_copy = True

                                                if second_copy:
                                                    locForDouble = '(type=SCRATCHDISK)\\notforextracopy=True'
                                                    tmpMsg  = 'registering double copy '
                                                    tmpMsg += 'location="{0}" lifetime={1}days activity={2} for dataset={3}'.format(locForDouble,lifetime,
                                                                                                                                    activity,targetName)
                                                    tmpLog.info(tmpMsg)
                                                    tmpStat = ddmIF.registerDatasetLocation(targetName,locForDouble,copies=2,owner=userName,
                                                                                            lifetime=lifetime,activity=activity,
                                                                                            grouping='NONE',weight='freespace',
                                                                                            ignore_availability=False)
                                                    if not tmpStat:
                                                        tmpLog.error('failed to register double copylocation {0} for {1}'.format(locForDouble,
                                                                                                                               targetName))
                                                        return retFatal
                                avDatasetList.append(targetName)
                            else:
                                tmpLog.info('{0} already registered'.format(targetName))
                    # check if dataset is in the container
                    if datasetSpec.containerName is not None and datasetSpec.containerName != datasetSpec.datasetName:
                        # get list of constituent datasets in the container
                        if datasetSpec.containerName not in cnDatasetMap:
                            cnDatasetMap[datasetSpec.containerName] = ddmIF.listDatasetsInContainer(datasetSpec.containerName)
                        # add dataset
                        if datasetSpec.datasetName not in cnDatasetMap[datasetSpec.containerName]:
                            tmpLog.info('adding {0} to {1}'.format(datasetSpec.datasetName,datasetSpec.containerName))
                            tmpStat = ddmIF.addDatasetsToContainer(datasetSpec.containerName,[datasetSpec.datasetName],
                                                                   backEnd=ddmBackEnd)
                            if not tmpStat:
                                tmpLog.error('failed to add {0} to {1}'.format(datasetSpec.datasetName,
                                                                               datasetSpec.containerName))
                                return retFatal
                            cnDatasetMap[datasetSpec.containerName].append(datasetSpec.datasetName)
                        else:
                            tmpLog.info('{0} already in {1}'.format(datasetSpec.datasetName,datasetSpec.containerName))
                    # update dataset
                    datasetSpec.status = 'registered'
                    self.taskBufferIF.updateDataset_JEDI(datasetSpec,{'jediTaskID':taskSpec.jediTaskID,
                                                                      'datasetID':datasetID})
            # register ES datasets
            if taskSpec.registerEsFiles():
                targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
                location = None
                metaData = {}
                metaData['task_id'] = taskSpec.jediTaskID
                metaData['hidden']  = True
                tmpLog.info('registering ES dataset {0} with location={1} meta={2}'.format(targetName,
                                                                                           location,
                                                                                           str(metaData)))
                tmpStat = ddmIF.registerNewDataset(targetName,location=location,metaData=metaData,
                                                   resurrect=True)
                if not tmpStat:
                    tmpLog.error('failed to register ES dataset {0}'.format(targetName))
                    return retFatal
                # register rule
                location = 'type=DATADISK'
                activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                grouping = 'NONE'
                tmpLog.info('registering location={0} activity={1} grouping={2}'.format(location,
                                                                                        activity,
                                                                                        grouping))
                tmpStat = ddmIF.registerDatasetLocation(targetName,location,activity=activity,
                                                        grouping=grouping)
                if not tmpStat:
                    tmpLog.error('failed to register location {0} with {2} for {1}'.format(location,
                                                                                           targetName,
                                                                                           activity))
                    return retFatal
            # open datasets
            if taskSpec.prodSourceLabel in ['managed','test']:
                # get the list of output/log datasets
                outDatasetList = []
                for tmpPandaJob in pandaJobs:
                    for tmpFileSpec in tmpPandaJob.Files:
                        if tmpFileSpec.type in ['output','log']:
                            if tmpFileSpec.destinationDBlock not in outDatasetList:
                                outDatasetList.append(tmpFileSpec.destinationDBlock)
                # open datasets
                for outDataset in outDatasetList:
                    tmpLog.info('open {0}'.format(outDataset))
                    ddmIF.openDataset(outDataset)
                    # unset lifetime
                    ddmIF.setDatasetMetadata(outDataset,'lifetime',None)
            # return
            tmpLog.info('done')
            return retOK
        except Exception:
            errtype,errvalue = sys.exc_info()[:2]
            tmpLog.error('doSetup failed with {0}:{1}'.format(errtype.__name__,errvalue))
            taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
            return retFatal

Пример #5

Показать файл

Файл: TaskRefinerBase.py Проект: pavlo-svirin/panda-jedi

    def extractCommon(self, jediTaskID, taskParamMap, workQueueMapper,
                      splitRule):
        # make task spec
        taskSpec = JediTaskSpec()
        taskSpec.jediTaskID = jediTaskID
        taskSpec.taskName = taskParamMap['taskName']
        taskSpec.userName = taskParamMap['userName']
        taskSpec.vo = taskParamMap['vo']
        taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel']
        taskSpec.taskPriority = taskParamMap['taskPriority']
        taskSpec.currentPriority = taskSpec.taskPriority
        taskSpec.architecture = taskParamMap['architecture']
        taskSpec.transUses = taskParamMap['transUses']
        taskSpec.transHome = taskParamMap['transHome']
        taskSpec.transPath = taskParamMap['transPath']
        taskSpec.processingType = taskParamMap['processingType']
        taskSpec.taskType = taskParamMap['taskType']
        taskSpec.splitRule = splitRule
        taskSpec.startTime = datetime.datetime.utcnow()
        if taskParamMap.has_key('workingGroup'):
            taskSpec.workingGroup = taskParamMap['workingGroup']
        if taskParamMap.has_key('countryGroup'):
            taskSpec.countryGroup = taskParamMap['countryGroup']
        if taskParamMap.has_key('ticketID'):
            taskSpec.ticketID = taskParamMap['ticketID']
        if taskParamMap.has_key('ticketSystemType'):
            taskSpec.ticketSystemType = taskParamMap['ticketSystemType']
        if taskParamMap.has_key('reqID'):
            taskSpec.reqID = taskParamMap['reqID']
        else:
            taskSpec.reqID = jediTaskID
        if taskParamMap.has_key('coreCount'):
            taskSpec.coreCount = taskParamMap['coreCount']
        else:
            taskSpec.coreCount = 1
        if taskParamMap.has_key('walltime'):
            taskSpec.walltime = taskParamMap['walltime']
        else:
            taskSpec.walltime = 0
        if not taskParamMap.has_key('walltimeUnit'):
            # force to set NULL so that retried tasks get data from scouts again
            taskSpec.forceUpdate('walltimeUnit')
        if taskParamMap.has_key('outDiskCount'):
            taskSpec.outDiskCount = taskParamMap['outDiskCount']
        else:
            taskSpec.outDiskCount = 0
        if 'outDiskUnit' in taskParamMap:
            taskSpec.outDiskUnit = taskParamMap['outDiskUnit']
        if taskParamMap.has_key('workDiskCount'):
            taskSpec.workDiskCount = taskParamMap['workDiskCount']
        else:
            taskSpec.workDiskCount = 0
        if taskParamMap.has_key('workDiskUnit'):
            taskSpec.workDiskUnit = taskParamMap['workDiskUnit']
        if taskParamMap.has_key('ramCount'):
            taskSpec.ramCount = taskParamMap['ramCount']
        else:
            taskSpec.ramCount = 0
        if taskParamMap.has_key('ramUnit'):
            taskSpec.ramUnit = taskParamMap['ramUnit']
        if taskParamMap.has_key('baseRamCount'):
            taskSpec.baseRamCount = taskParamMap['baseRamCount']
        else:
            taskSpec.baseRamCount = 0
        # HS06 stuff
        if 'cpuTimeUnit' in taskParamMap:
            taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit']
        if 'cpuTime' in taskParamMap:
            taskSpec.cpuTime = taskParamMap['cpuTime']
        if 'cpuEfficiency' in taskParamMap:
            taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency']
        else:
            # 90% of cpu efficiency by default
            taskSpec.cpuEfficiency = 90
        if 'baseWalltime' in taskParamMap:
            taskSpec.baseWalltime = taskParamMap['baseWalltime']
        else:
            # 10min of offset by default
            taskSpec.baseWalltime = 10 * 60
        # for merge
        if 'mergeRamCount' in taskParamMap:
            taskSpec.mergeRamCount = taskParamMap['mergeRamCount']
        if 'mergeCoreCount' in taskParamMap:
            taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount']
        # scout
        if not taskParamMap.has_key(
                'skipScout') and not taskSpec.isPostScout():
            taskSpec.setUseScout(True)
        # cloud
        if taskParamMap.has_key('cloud'):
            self.cloudName = taskParamMap['cloud']
            taskSpec.cloud = self.cloudName
        else:
            # set dummy to force update
            taskSpec.cloud = 'dummy'
            taskSpec.cloud = None
        # site
        if taskParamMap.has_key('site'):
            self.siteName = taskParamMap['site']
            taskSpec.site = self.siteName
        else:
            # set dummy to force update
            taskSpec.site = 'dummy'
            taskSpec.site = None
        # nucleus
        if 'nucleus' in taskParamMap:
            taskSpec.nucleus = taskParamMap['nucleus']
        # preset some parameters for job cloning
        if 'useJobCloning' in taskParamMap:
            # set implicit parameters
            if not 'nEventsPerWorker' in taskParamMap:
                taskParamMap['nEventsPerWorker'] = 1
            if not 'nSitesPerJob' in taskParamMap:
                taskParamMap['nSitesPerJob'] = 2
            if not 'nEsConsumers' in taskParamMap:
                taskParamMap['nEsConsumers'] = taskParamMap['nSitesPerJob']
        # event service flag
        if 'useJobCloning' in taskParamMap:
            taskSpec.eventService = 2
        elif taskParamMap.has_key('nEventsPerWorker'):
            taskSpec.eventService = 1
        else:
            taskSpec.eventService = 0
        # ttcr: requested time to completion
        if taskParamMap.has_key('ttcrTimestamp'):
            try:
                # get rid of the +00:00 timezone string and parse the timestamp
                taskSpec.ttcRequested = datetime.datetime.strptime(
                    taskParamMap['ttcrTimestamp'].split('+')[0],
                    '%Y-%m-%d %H:%M:%S.%f')
            except (IndexError, ValueError):
                pass
        # goal
        if 'goal' in taskParamMap:
            try:
                taskSpec.goal = int(float(taskParamMap['goal']) * 10)
                if taskSpec.goal >= 1000:
                    taskSpec.goal = None
            except:
                pass
        # campaign
        if taskParamMap.has_key('campaign'):
            taskSpec.campaign = taskParamMap['campaign']
        # request type
        if 'requestType' in taskParamMap:
            taskSpec.requestType = taskParamMap['requestType']
        self.taskSpec = taskSpec
        # set split rule
        if 'tgtNumEventsPerJob' in taskParamMap:
            # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used
            if not 'nFilesPerJob' in taskParamMap:
                self.setSplitRule(None, taskParamMap['tgtNumEventsPerJob'],
                                  JediTaskSpec.splitRuleToken['nEventsPerJob'])
        self.setSplitRule(taskParamMap, 'nFilesPerJob',
                          JediTaskSpec.splitRuleToken['nFilesPerJob'])
        self.setSplitRule(taskParamMap, 'nEventsPerJob',
                          JediTaskSpec.splitRuleToken['nEventsPerJob'])
        self.setSplitRule(taskParamMap, 'nGBPerJob',
                          JediTaskSpec.splitRuleToken['nGBPerJob'])
        self.setSplitRule(taskParamMap, 'nMaxFilesPerJob',
                          JediTaskSpec.splitRuleToken['nMaxFilesPerJob'])
        self.setSplitRule(taskParamMap, 'nEventsPerWorker',
                          JediTaskSpec.splitRuleToken['nEventsPerWorker'])
        self.setSplitRule(taskParamMap, 'useLocalIO',
                          JediTaskSpec.splitRuleToken['useLocalIO'])
        self.setSplitRule(taskParamMap, 'disableAutoRetry',
                          JediTaskSpec.splitRuleToken['disableAutoRetry'])
        self.setSplitRule(taskParamMap, 'nEsConsumers',
                          JediTaskSpec.splitRuleToken['nEsConsumers'])
        self.setSplitRule(taskParamMap, 'waitInput',
                          JediTaskSpec.splitRuleToken['waitInput'])
        self.setSplitRule(taskParamMap, 'addNthFieldToLFN',
                          JediTaskSpec.splitRuleToken['addNthFieldToLFN'])
        self.setSplitRule(taskParamMap, 'scoutSuccessRate',
                          JediTaskSpec.splitRuleToken['scoutSuccessRate'])
        self.setSplitRule(taskParamMap, 't1Weight',
                          JediTaskSpec.splitRuleToken['t1Weight'])
        self.setSplitRule(taskParamMap, 'maxAttemptES',
                          JediTaskSpec.splitRuleToken['maxAttemptES'])
        self.setSplitRule(taskParamMap, 'nSitesPerJob',
                          JediTaskSpec.splitRuleToken['nSitesPerJob'])
        self.setSplitRule(taskParamMap, 'nJumboJobs',
                          JediTaskSpec.splitRuleToken['nJumboJobs'])
        self.setSplitRule(taskParamMap, 'nEventsPerMergeJob',
                          JediTaskSpec.splitRuleToken['nEventsPerMergeJob'])
        self.setSplitRule(taskParamMap, 'nFilesPerMergeJob',
                          JediTaskSpec.splitRuleToken['nFilesPerMergeJob'])
        self.setSplitRule(taskParamMap, 'nGBPerMergeJob',
                          JediTaskSpec.splitRuleToken['nGBPerMergeJob'])
        self.setSplitRule(taskParamMap, 'nMaxFilesPerMergeJob',
                          JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob'])
        if taskParamMap.has_key('loadXML'):
            self.setSplitRule(None, 3, JediTaskSpec.splitRuleToken['loadXML'])
            self.setSplitRule(None, 4,
                              JediTaskSpec.splitRuleToken['groupBoundaryID'])
        if taskParamMap.has_key('pfnList'):
            self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['pfnList'])
        if taskParamMap.has_key(
                'noWaitParent') and taskParamMap['noWaitParent'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['noWaitParent'])
        if 'respectLB' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['respectLB'])
        if taskParamMap.has_key('reuseSecOnDemand'):
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['reuseSecOnDemand'])
        if 'ddmBackEnd' in taskParamMap:
            self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd'])
        if 'disableReassign' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['disableReassign'])
        if 'allowPartialFinish' in taskParamMap:
            self.setSplitRule(
                None, 1, JediTaskSpec.splitRuleToken['allowPartialFinish'])
        if 'useExhausted' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['useExhausted'])
        if 'useRealNumEvents' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['useRealNumEvents'])
        if 'ipConnectivity' in taskParamMap:
            self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity'])
        if 'altStageOut' in taskParamMap:
            self.taskSpec.setAltStageOut(taskParamMap['altStageOut'])
        if 'allowInputLAN' in taskParamMap:
            self.taskSpec.setAllowInputLAN(taskParamMap['allowInputLAN'])
        if 'runUntilClosed' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['runUntilClosed'])
        if 'stayOutputOnSite' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['stayOutputOnSite'])
        if 'useJobCloning' in taskParamMap:
            scValue = EventServiceUtils.getJobCloningValue(
                taskParamMap['useJobCloning'])
            self.setSplitRule(None, scValue,
                              JediTaskSpec.splitRuleToken['useJobCloning'])
        if 'failWhenGoalUnreached' in taskParamMap and taskParamMap[
                'failWhenGoalUnreached'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['failGoalUnreached'])
        if 'switchEStoNormal' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['switchEStoNormal'])
        if 'nEventsPerRange' in taskParamMap:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['dynamicNumEvents'])
        if 'allowInputWAN' in taskParamMap and taskParamMap[
                'allowInputWAN'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['allowInputWAN'])
        if 'putLogToOS' in taskParamMap and taskParamMap['putLogToOS'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['putLogToOS'])
        if 'mergeEsOnOS' in taskParamMap and taskParamMap[
                'mergeEsOnOS'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['mergeEsOnOS'])
        if 'writeInputToFile' in taskParamMap and taskParamMap[
                'writeInputToFile'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['writeInputToFile'])
        if 'useFileAsSourceLFN' in taskParamMap and taskParamMap[
                'useFileAsSourceLFN'] == True:
            self.setSplitRule(
                None, 1, JediTaskSpec.splitRuleToken['useFileAsSourceLFN'])
        if 'ignoreMissingInDS' in taskParamMap and taskParamMap[
                'ignoreMissingInDS'] == True:
            self.setSplitRule(None, 1,
                              JediTaskSpec.splitRuleToken['ignoreMissingInDS'])
        # work queue
        workQueue = None
        if 'workQueueName' in taskParamMap:
            # work queue is specified
            workQueue = workQueueMapper.getQueueWithName(
                taskSpec.vo, taskSpec.prodSourceLabel,
                taskParamMap['workQueueName'])
        if workQueue is None:
            # get work queue based on task attributes
            workQueue, tmpStr = workQueueMapper.getQueueWithSelParams(
                taskSpec.vo,
                taskSpec.prodSourceLabel,
                processingType=taskSpec.processingType,
                workingGroup=taskSpec.workingGroup,
                coreCount=taskSpec.coreCount,
                site=taskSpec.site,
                eventService=taskSpec.eventService,
                splitRule=taskSpec.splitRule,
                campaign=taskSpec.campaign)
        if workQueue is None:
            errStr = 'workqueue is undefined for vo={0} label={1} '.format(
                taskSpec.vo, taskSpec.prodSourceLabel)
            errStr += 'processingType={0} workingGroup={1} coreCount={2} eventService={3} '.format(
                taskSpec.processingType, taskSpec.workingGroup,
                taskSpec.coreCount, taskSpec.eventService)
            errStr += 'splitRule={0} campaign={1}'.format(
                taskSpec.splitRule, taskSpec.campaign)
            raise RuntimeError, errStr
        self.taskSpec.workQueue_ID = workQueue.queue_id

        # Initialize the global share
        gshare = None
        if 'gshare' in taskParamMap and self.taskBufferIF.is_valid_share(
                taskParamMap['gshare']):
            # work queue is specified
            gshare = taskParamMap['gshare']
        else:
            # get share based on definition
            gshare = self.taskBufferIF.get_share_for_task(self.taskSpec)
            if gshare is None:
                gshare = 'No match'
                # errStr  = 'share is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel)
                # errStr += 'workingGroup={0} campaign={1} '.format(taskSpec.workingGroup, taskSpec.campaign)
                # raise RuntimeError,errStr

            self.taskSpec.gshare = gshare

        # return
        return

Пример #6

Показать файл

Файл: AdderGen.py Проект: PanDAWMS/panda-server

 def parseXML(self):
     # get LFN and GUID
     # self.logger.debug('XML filename : %s' % self.xmlFile)
     # no outputs
     log_out = [f for f in self.job.Files if f.type in ['log', 'output']]
     if not log_out:
         self.logger.debug("has no outputs")
         self.logger.debug("parseXML end")
         return 0
     # get input files
     inputLFNs = []
     for file in self.job.Files:
         if file.type == 'input':
             inputLFNs.append(file.lfn)
     # parse XML
     lfns = []
     guids = []
     fsizes = []
     md5sums = []
     chksums = []
     surls = []
     fullLfnMap = {}
     nEventsMap = {}
     guidMap = dict()
     try:
         # root  = xml.dom.minidom.parse(self.xmlFile)
         root = xml.dom.minidom.parseString(self.data)
         files = root.getElementsByTagName('File')
         for file in files:
             # get GUID
             guid = str(file.getAttribute('ID'))
             # get PFN and LFN nodes
             logical = file.getElementsByTagName('logical')[0]
             lfnNode = logical.getElementsByTagName('lfn')[0]
             # convert UTF8 to Raw
             lfn = str(lfnNode.getAttribute('name'))
             # get metadata
             fsize = None
             md5sum = None
             adler32 = None
             surl = None
             fullLFN = None
             for meta in file.getElementsByTagName('metadata'):
                 # get fsize
                 name = str(meta.getAttribute('att_name'))
                 if name == 'fsize':
                     fsize = long(meta.getAttribute('att_value'))
                 elif name == 'md5sum':
                     md5sum = str(meta.getAttribute('att_value'))
                     # check
                     if re.search("^[a-fA-F0-9]{32}$", md5sum) is None:
                         md5sum = None
                 elif name == 'adler32':
                     adler32 = str(meta.getAttribute('att_value'))
                 elif name == 'surl':
                     surl = str(meta.getAttribute('att_value'))
                 elif name == 'full_lfn':
                     fullLFN = str(meta.getAttribute('att_value'))
             # endpoints
             self.extraInfo['endpoint'][lfn] = []
             for epNode in file.getElementsByTagName('endpoint'):
                 self.extraInfo['endpoint'][lfn].append(
                     str(epNode.firstChild.data))
             # error check
             if (lfn not in inputLFNs) and (fsize is None or
                                            (md5sum is None
                                             and adler32 is None)):
                 if EventServiceUtils.isEventServiceMerge(self.job):
                     continue
                 else:
                     raise RuntimeError('fsize/md5sum/adler32/surl=None')
             # append
             lfns.append(lfn)
             guids.append(guid)
             fsizes.append(fsize)
             md5sums.append(md5sum)
             surls.append(surl)
             if adler32 is not None:
                 # use adler32 if available
                 chksums.append("ad:%s" % adler32)
             else:
                 chksums.append("md5:%s" % md5sum)
             if fullLFN is not None:
                 fullLfnMap[lfn] = fullLFN
     except Exception:
         # parse json
         try:
             import json
             # with open(self.xmlFile) as tmpF:
             jsonDict = json.loads(self.data)
             for lfn in jsonDict:
                 fileData = jsonDict[lfn]
                 lfn = str(lfn)
                 fsize = None
                 md5sum = None
                 adler32 = None
                 surl = None
                 fullLFN = None
                 guid = str(fileData['guid'])
                 if 'fsize' in fileData:
                     fsize = long(fileData['fsize'])
                 if 'md5sum' in fileData:
                     md5sum = str(fileData['md5sum'])
                     # check
                     if re.search("^[a-fA-F0-9]{32}$", md5sum) is None:
                         md5sum = None
                 if 'adler32' in fileData:
                     adler32 = str(fileData['adler32'])
                 if 'surl' in fileData:
                     surl = str(fileData['surl'])
                 if 'full_lfn' in fileData:
                     fullLFN = str(fileData['full_lfn'])
                 # endpoints
                 self.extraInfo['endpoint'][lfn] = []
                 if 'endpoint' in fileData:
                     self.extraInfo['endpoint'][lfn] = fileData['endpoint']
                 # error check
                 if (lfn not in inputLFNs) and (fsize is None or
                                                (md5sum is None
                                                 and adler32 is None)):
                     if EventServiceUtils.isEventServiceMerge(self.job):
                         continue
                     else:
                         raise RuntimeError(
                             'fsize/md5sum/adler32/surl=None')
                 # append
                 lfns.append(lfn)
                 guids.append(guid)
                 fsizes.append(fsize)
                 md5sums.append(md5sum)
                 surls.append(surl)
                 if adler32 is not None:
                     # use adler32 if available
                     chksums.append("ad:%s" % adler32)
                 else:
                     chksums.append("md5:%s" % md5sum)
                 if fullLFN is not None:
                     fullLfnMap[lfn] = fullLFN
         except Exception:
             # check if file exists
             # if os.path.exists(self.xmlFile):
             if True:
                 type, value, traceBack = sys.exc_info()
                 self.logger.error(": %s %s" % (type, value))
                 # set failed anyway
                 self.job.jobStatus = 'failed'
                 # XML error happens when pilot got killed due to wall-time limit or failures in wrapper
                 if (self.job.pilotErrorCode in [0,'0','NULL']) and \
                    (self.job.taskBufferErrorCode not in [pandaserver.taskbuffer.ErrorCode.EC_WorkerDone]) and \
                    (self.job.transExitCode  in [0,'0','NULL']):
                     self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                     self.job.ddmErrorDiag = "Could not get GUID/LFN/MD5/FSIZE/SURL from pilot XML"
                 return 2
             else:
                 # XML was deleted
                 return 1
     # parse metadata to get nEvents
     nEventsFrom = None
     try:
         root = xml.dom.minidom.parseString(self.job.metadata)
         files = root.getElementsByTagName('File')
         for file in files:
             # get GUID
             guid = str(file.getAttribute('ID'))
             # get PFN and LFN nodes
             logical = file.getElementsByTagName('logical')[0]
             lfnNode = logical.getElementsByTagName('lfn')[0]
             # convert UTF8 to Raw
             lfn = str(lfnNode.getAttribute('name'))
             guidMap[lfn] = guid
             # get metadata
             nevents = None
             for meta in file.getElementsByTagName('metadata'):
                 # get fsize
                 name = str(meta.getAttribute('att_name'))
                 if name == 'events':
                     nevents = long(meta.getAttribute('att_value'))
                     nEventsMap[lfn] = nevents
                     break
         nEventsFrom = "xml"
     except Exception:
         pass
     # parse json
     try:
         import json
         jsonDict = json.loads(self.job.metadata)
         for jsonFileItem in jsonDict['files']['output']:
             for jsonSubFileItem in jsonFileItem['subFiles']:
                 lfn = str(jsonSubFileItem['name'])
                 try:
                     nevents = long(jsonSubFileItem['nentries'])
                     nEventsMap[lfn] = nevents
                 except Exception:
                     pass
                 try:
                     guid = str(jsonSubFileItem['file_guid'])
                     guidMap[lfn] = guid
                 except Exception:
                     pass
         nEventsFrom = "json"
     except Exception:
         pass
     # use nEvents and GUIDs reported by the pilot if no job report
     if self.job.metadata == 'NULL' and self.jobStatus == 'finished' and self.job.nEvents > 0 \
             and self.job.prodSourceLabel in ['managed']:
         for file in self.job.Files:
             if file.type == 'output':
                 nEventsMap[file.lfn] = self.job.nEvents
         for lfn, guid in zip(lfns, guids):
             guidMap[lfn] = guid
         nEventsFrom = "pilot"
     self.logger.debug('nEventsMap=%s' % str(nEventsMap))
     self.logger.debug('nEventsFrom=%s' % str(nEventsFrom))
     self.logger.debug('guidMap=%s' % str(guidMap))
     self.logger.debug('self.job.jobStatus=%s in parseXML' %
                       self.job.jobStatus)
     self.logger.debug(
         'isES=%s isJumbo=%s' % (EventServiceUtils.isEventServiceJob(
             self.job), EventServiceUtils.isJumboJob(self.job)))
     # get lumi block number
     lumiBlockNr = self.job.getLumiBlockNr()
     # copy files for variable number of outputs
     tmpStat = self.copyFilesForVariableNumOutputs(lfns)
     if not tmpStat:
         self.logger.error(
             "failed to copy files for variable number of outputs")
         return 2
     # check files
     fileList = []
     for file in self.job.Files:
         fileList.append(file.lfn)
         if file.type == 'input':
             if file.lfn in lfns:
                 if self.job.prodSourceLabel in ['user', 'panda']:
                     # skipped file
                     file.status = 'skipped'
                 elif self.job.prodSourceLabel in [
                         'managed', 'test'
                 ] + JobUtils.list_ptest_prod_sources:
                     # failed by pilot
                     file.status = 'failed'
         elif file.type == 'output' or file.type == 'log':
             # add only log file for failed jobs
             if self.jobStatus == 'failed' and file.type != 'log':
                 file.status = 'failed'
                 continue
             # set failed if it is missing in XML
             if file.lfn not in lfns:
                 if (self.job.jobStatus == 'finished' and EventServiceUtils.isEventServiceJob(self.job)) \
                         or EventServiceUtils.isJumboJob(self.job):
                     # unset file status for ES jobs
                     pass
                 elif file.isAllowedNoOutput():
                     # allowed not to be produced
                     file.status = 'nooutput'
                     self.logger.debug('set {0} to status={1}'.format(
                         file.lfn, file.status))
                 else:
                     file.status = 'failed'
                     self.job.jobStatus = 'failed'
                     self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                     self.job.ddmErrorDiag = "expected output {0} is missing in pilot XML".format(
                         file.lfn)
                     self.logger.error(self.job.ddmErrorDiag)
                 continue
             # look for GUID with LFN
             try:
                 i = lfns.index(file.lfn)
                 file.GUID = guids[i]
                 file.fsize = fsizes[i]
                 file.md5sum = md5sums[i]
                 file.checksum = chksums[i]
                 surl = surls[i]
                 # status
                 file.status = 'ready'
                 # change to full LFN
                 if file.lfn in fullLfnMap:
                     file.lfn = fullLfnMap[file.lfn]
                 # add SURL to extraInfo
                 self.extraInfo['surl'][file.lfn] = surl
                 # add nevents
                 if file.lfn in nEventsMap:
                     self.extraInfo['nevents'][file.lfn] = nEventsMap[
                         file.lfn]
             except Exception:
                 # status
                 file.status = 'failed'
                 type, value, traceBack = sys.exc_info()
                 self.logger.error(": %s %s" % (type, value))
             # set lumi block number
             if lumiBlockNr is not None and file.status != 'failed':
                 self.extraInfo['lbnr'][file.lfn] = lumiBlockNr
     self.extraInfo['guid'] = guidMap
     # check consistency between XML and filesTable
     for lfn in lfns:
         if lfn not in fileList:
             self.logger.error("%s is not found in filesTable" % lfn)
             self.job.jobStatus = 'failed'
             for tmpFile in self.job.Files:
                 tmpFile.status = 'failed'
             self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
             self.job.ddmErrorDiag = "pilot produced {0} inconsistently with jobdef".format(
                 lfn)
             return 2
     # return
     self.logger.debug("parseXML end")
     return 0

Пример #7

Показать файл

Файл: AdderGen.py Проект: PanDAWMS/panda-server

    def run(self):
        try:
            self.logger.debug("new start: %s attemptNr=%s" %
                              (self.jobStatus, self.attemptNr))

            # got lock, get the report
            report_dict = self.taskBuffer.getJobOutputReport(
                panda_id=self.jobID, attempt_nr=self.attemptNr)
            self.data = report_dict.get('data')

            # query job
            self.job = self.taskBuffer.peekJobs([self.jobID],
                                                fromDefined=False,
                                                fromWaiting=False,
                                                forAnal=True)[0]
            # check if job has finished
            if self.job is None:
                self.logger.debug(': job not found in DB')
            elif self.job.jobStatus in [
                    'finished', 'failed', 'unknown', 'merging'
            ]:
                self.logger.error(': invalid state -> %s' % self.job.jobStatus)
            elif self.attemptNr is not None and self.job.attemptNr != self.attemptNr:
                self.logger.error('wrong attemptNr -> job=%s <> %s' %
                                  (self.job.attemptNr, self.attemptNr))
            # elif self.attemptNr is not None and self.job.jobStatus == 'transferring':
            #     errMsg = 'XML with attemptNr for {0}'.format(self.job.jobStatus)
            #     self.logger.error(errMsg)
            elif self.jobStatus == EventServiceUtils.esRegStatus:
                # instantiate concrete plugin
                adderPluginClass = self.getPluginClass(self.job.VO,
                                                       self.job.cloud)
                adderPlugin = adderPluginClass(self.job,
                                               taskBuffer=self.taskBuffer,
                                               siteMapper=self.siteMapper,
                                               logger=self.logger)
                # execute
                self.logger.debug('plugin is ready for ES file registration')
                adderPlugin.registerEventServiceFiles()
            else:
                # check file status in JEDI
                if not self.job.isCancelled() and self.job.taskBufferErrorCode not in \
                                                      [pandaserver.taskbuffer.ErrorCode.EC_PilotRetried]:
                    fileCheckInJEDI = self.taskBuffer.checkInputFileStatusInJEDI(
                        self.job)
                    self.logger.debug("check file status in JEDI : {0}".format(
                        fileCheckInJEDI))
                    if fileCheckInJEDI is None:
                        raise RuntimeError(
                            'failed to check file status in JEDI')
                    if fileCheckInJEDI is False:
                        # set job status to failed since some file status is wrong in JEDI
                        self.jobStatus = 'failed'
                        self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                        errStr = "inconsistent file status between Panda and JEDI. "
                        errStr += "failed to avoid duplicated processing caused by synchronization failure"
                        self.job.ddmErrorDiag = errStr
                        self.logger.debug(
                            "set jobStatus={0} since input is inconsistent between Panda and JEDI"
                            .format(self.jobStatus))
                    elif self.job.jobSubStatus in ['pilot_closed']:
                        # terminated by the pilot
                        self.logger.debug(
                            "going to closed since terminated by the pilot")
                        retClosed = self.taskBuffer.killJobs([self.jobID],
                                                             'pilot', '60',
                                                             True)
                        if retClosed[0] is True:
                            self.logger.debug("end")
                            # remove Catalog
                            self.taskBuffer.deleteJobOutputReport(
                                panda_id=self.jobID, attempt_nr=self.attemptNr)
                            return
                    # check for cloned jobs
                    if EventServiceUtils.isJobCloningJob(self.job):
                        checkJC = self.taskBuffer.checkClonedJob(self.job)
                        if checkJC is None:
                            raise RuntimeError(
                                'failed to check the cloned job')
                        # failed to lock semaphore
                        if checkJC['lock'] is False:
                            self.jobStatus = 'failed'
                            self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                            self.job.ddmErrorDiag = "failed to lock semaphore for job cloning"
                            self.logger.debug(
                                "set jobStatus={0} since did not get semaphore for job cloning"
                                .format(self.jobStatus))
                # use failed for cancelled/closed jobs
                if self.job.isCancelled():
                    self.jobStatus = 'failed'
                    # reset error codes to skip retrial module
                    self.job.pilotErrorCode = 0
                    self.job.exeErrorCode = 0
                    self.job.ddmErrorCode = 0
                # keep old status
                oldJobStatus = self.job.jobStatus
                # set job status
                if self.job.jobStatus not in ['transferring']:
                    self.job.jobStatus = self.jobStatus
                addResult = None
                adderPlugin = None
                # parse XML
                parseResult = self.parseXML()
                if parseResult < 2:
                    # interaction with DDM
                    try:
                        # instantiate concrete plugin
                        adderPluginClass = self.getPluginClass(
                            self.job.VO, self.job.cloud)
                        adderPlugin = adderPluginClass(
                            self.job,
                            taskBuffer=self.taskBuffer,
                            siteMapper=self.siteMapper,
                            extraInfo=self.extraInfo,
                            logger=self.logger)
                        # execute
                        self.logger.debug('plugin is ready')
                        adderPlugin.execute()
                        addResult = adderPlugin.result
                        self.logger.debug('plugin done with %s' %
                                          (addResult.statusCode))
                    except Exception:
                        errtype, errvalue = sys.exc_info()[:2]
                        self.logger.error(
                            "failed to execute AdderPlugin for VO={0} with {1}:{2}"
                            .format(self.job.VO, errtype, errvalue))
                        self.logger.error(
                            "failed to execute AdderPlugin for VO={0} with {1}"
                            .format(self.job.VO, traceback.format_exc()))
                        addResult = None
                        self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                        self.job.ddmErrorDiag = "AdderPlugin failure"

                    # ignore temporary errors
                    if self.ignoreTmpError and addResult is not None and addResult.isTemporary(
                    ):
                        self.logger.debug(': ignore %s ' %
                                          self.job.ddmErrorDiag)
                        self.logger.debug('escape')
                        # unlock job output report
                        self.taskBuffer.unlockJobOutputReport(
                            panda_id=self.jobID,
                            attempt_nr=self.attemptNr,
                            pid=self.pid,
                            lock_offset=self.lock_offset)
                        return
                    # failed
                    if addResult is None or not addResult.isSucceeded():
                        self.job.jobStatus = 'failed'
                # set file status for failed jobs or failed transferring jobs
                self.logger.debug(
                    "status after plugin call :job.jobStatus=%s jobStatus=%s" %
                    (self.job.jobStatus, self.jobStatus))
                if self.job.jobStatus == 'failed' or self.jobStatus == 'failed':
                    # First of all: check if job failed and in this case take first actions according to error table
                    source, error_code, error_diag = None, None, None
                    errors = []
                    if self.job.pilotErrorCode:
                        source = 'pilotErrorCode'
                        error_code = self.job.pilotErrorCode
                        error_diag = self.job.pilotErrorDiag
                        errors.append({
                            'source': source,
                            'error_code': error_code,
                            'error_diag': error_diag
                        })
                    if self.job.exeErrorCode:
                        source = 'exeErrorCode'
                        error_code = self.job.exeErrorCode
                        error_diag = self.job.exeErrorDiag
                        errors.append({
                            'source': source,
                            'error_code': error_code,
                            'error_diag': error_diag
                        })
                    if self.job.ddmErrorCode:
                        source = 'ddmErrorCode'
                        error_code = self.job.ddmErrorCode
                        error_diag = self.job.ddmErrorDiag
                        errors.append({
                            'source': source,
                            'error_code': error_code,
                            'error_diag': error_diag
                        })
                    if self.job.transExitCode:
                        source = 'transExitCode'
                        error_code = self.job.transExitCode
                        error_diag = ''
                        errors.append({
                            'source': source,
                            'error_code': error_code,
                            'error_diag': error_diag
                        })

                    # _logger.info("updatejob has source %s, error_code %s and error_diag %s"%(source, error_code, error_diag))

                    if source and error_code:
                        try:
                            self.logger.debug(
                                "AdderGen.run will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(
                                self.taskBuffer, self.job.PandaID, errors,
                                self.job.attemptNr)
                            self.logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            self.logger.error(
                                "apply_retrial_rules excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                    self.job.jobStatus = 'failed'
                    for file in self.job.Files:
                        if file.type in ['output', 'log']:
                            if addResult is not None and file.lfn in addResult.mergingFiles:
                                file.status = 'merging'
                            else:
                                file.status = 'failed'
                else:
                    # reset errors
                    self.job.jobDispatcherErrorCode = 0
                    self.job.jobDispatcherErrorDiag = 'NULL'
                    # set status
                    if addResult is not None and addResult.mergingFiles != []:
                        # set status for merging:
                        for file in self.job.Files:
                            if file.lfn in addResult.mergingFiles:
                                file.status = 'merging'
                        self.job.jobStatus = 'merging'
                        # propagate transition to prodDB
                        self.job.stateChangeTime = time.strftime(
                            '%Y-%m-%d %H:%M:%S', time.gmtime())
                    elif addResult is not None and addResult.transferringFiles != []:
                        # set status for transferring
                        for file in self.job.Files:
                            if file.lfn in addResult.transferringFiles:
                                file.status = 'transferring'
                        self.job.jobStatus = 'transferring'
                        self.job.jobSubStatus = None
                        # propagate transition to prodDB
                        self.job.stateChangeTime = time.strftime(
                            '%Y-%m-%d %H:%M:%S', time.gmtime())
                    else:
                        self.job.jobStatus = 'finished'
                # endtime
                if self.job.endTime == 'NULL':
                    self.job.endTime = time.strftime('%Y-%m-%d %H:%M:%S',
                                                     time.gmtime())
                # output size and # of outputs
                self.job.nOutputDataFiles = 0
                self.job.outputFileBytes = 0
                for tmpFile in self.job.Files:
                    if tmpFile.type == 'output':
                        self.job.nOutputDataFiles += 1
                        try:
                            self.job.outputFileBytes += tmpFile.fsize
                        except Exception:
                            pass
                # protection
                maxOutputFileBytes = 99999999999
                if self.job.outputFileBytes > maxOutputFileBytes:
                    self.job.outputFileBytes = maxOutputFileBytes
                # set cancelled state
                if self.job.commandToPilot == 'tobekilled' and self.job.jobStatus == 'failed':
                    self.job.jobStatus = 'cancelled'
                # update job
                if oldJobStatus in ['cancelled', 'closed']:
                    pass
                else:
                    self.logger.debug("updating DB")
                    retU = self.taskBuffer.updateJobs(
                        [self.job],
                        False,
                        oldJobStatusList=[oldJobStatus],
                        extraInfo=self.extraInfo)
                    self.logger.debug("retU: %s" % retU)
                    # failed
                    if not retU[0]:
                        self.logger.error(
                            'failed to update DB for pandaid={0}'.format(
                                self.job.PandaID))
                        # unlock job output report
                        self.taskBuffer.unlockJobOutputReport(
                            panda_id=self.jobID,
                            attempt_nr=self.attemptNr,
                            pid=self.pid,
                            lock_offset=self.lock_offset)
                        return

                    try:
                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        self.logger.debug("AdderGen.run will peek the job")
                        job_tmp = self.taskBuffer.peekJobs(
                            [self.job.PandaID],
                            fromDefined=False,
                            fromArchived=True,
                            fromWaiting=False)[0]
                        self.logger.debug(
                            "status {0}, taskBufferErrorCode {1}, taskBufferErrorDiag {2}"
                            .format(job_tmp.jobStatus,
                                    job_tmp.taskBufferErrorCode,
                                    job_tmp.taskBufferErrorDiag))
                        if job_tmp.jobStatus == 'failed' and job_tmp.taskBufferErrorCode:
                            source = 'taskBufferErrorCode'
                            error_code = job_tmp.taskBufferErrorCode
                            error_diag = job_tmp.taskBufferErrorDiag
                            errors = [{
                                'source': source,
                                'error_code': error_code,
                                'error_diag': error_diag
                            }]
                            self.logger.debug(
                                "AdderGen.run 2 will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(
                                self.taskBuffer, job_tmp.PandaID, errors,
                                job_tmp.attemptNr)
                            self.logger.debug("apply_retrial_rules 2 is back")
                    except IndexError:
                        pass
                    except Exception as e:
                        self.logger.error(
                            "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s"
                            % (e, traceback.format_exc()))

                    # setup for closer
                    if not (EventServiceUtils.isEventServiceJob(self.job)
                            and self.job.isCancelled()):
                        destDBList = []
                        guidList = []
                        for file in self.job.Files:
                            # ignore inputs
                            if file.type == 'input':
                                continue
                            # skip pseudo datasets
                            if file.destinationDBlock in ['', None, 'NULL']:
                                continue
                            # start closer for output/log datasets
                            if file.destinationDBlock not in destDBList:
                                destDBList.append(file.destinationDBlock)
                            # collect GUIDs
                            if (self.job.prodSourceLabel=='panda' or (self.job.prodSourceLabel in ['rucio_test'] + JobUtils.list_ptest_prod_sources and \
                                                                      self.job.processingType in ['pathena','prun','gangarobot-rctest','hammercloud'])) \
                                                                      and file.type == 'output':
                                # extract base LFN since LFN was changed to full LFN for CMS
                                baseLFN = file.lfn.split('/')[-1]
                                guidList.append({
                                    'lfn': baseLFN,
                                    'guid': file.GUID,
                                    'type': file.type,
                                    'checksum': file.checksum,
                                    'md5sum': file.md5sum,
                                    'fsize': file.fsize,
                                    'scope': file.scope
                                })
                        if guidList != []:
                            retG = self.taskBuffer.setGUIDs(guidList)
                        if destDBList != []:
                            # start Closer
                            if adderPlugin is not None and hasattr(
                                    adderPlugin, 'datasetMap'
                            ) and adderPlugin.datasetMap != {}:
                                cThr = Closer.Closer(
                                    self.taskBuffer,
                                    destDBList,
                                    self.job,
                                    datasetMap=adderPlugin.datasetMap)
                            else:
                                cThr = Closer.Closer(self.taskBuffer,
                                                     destDBList, self.job)
                            self.logger.debug("start Closer")
                            # cThr.start()
                            # cThr.join()
                            cThr.run()
                            del cThr
                            self.logger.debug("end Closer")
                        # run closer for assocaiate parallel jobs
                        if EventServiceUtils.isJobCloningJob(self.job):
                            assDBlockMap = self.taskBuffer.getDestDBlocksWithSingleConsumer(
                                self.job.jediTaskID, self.job.PandaID,
                                destDBList)
                            for assJobID in assDBlockMap:
                                assDBlocks = assDBlockMap[assJobID]
                                assJob = self.taskBuffer.peekJobs(
                                    [assJobID],
                                    fromDefined=False,
                                    fromArchived=False,
                                    fromWaiting=False,
                                    forAnal=True)[0]
                                if self.job is None:
                                    self.logger.debug(
                                        ': associated job PandaID={0} not found in DB'
                                        .format(assJobID))
                                else:
                                    cThr = Closer.Closer(
                                        self.taskBuffer, assDBlocks, assJob)
                                    self.logger.debug(
                                        "start Closer for PandaID={0}".format(
                                            assJobID))
                                    # cThr.start()
                                    # cThr.join()
                                    cThr.run()
                                    del cThr
                                    self.logger.debug(
                                        "end Closer for PandaID={0}".format(
                                            assJobID))
            self.logger.debug("end")
            # try:
            #     # remove Catalog
            #     os.remove(self.xmlFile)
            # except Exception:
            #     pass
            # remove Catalog
            self.taskBuffer.deleteJobOutputReport(panda_id=self.jobID,
                                                  attempt_nr=self.attemptNr)
            del self.data
            del report_dict
        except Exception as e:
            errStr = ": {} {}".format(str(e), traceback.format_exc())
            self.logger.error(errStr)
            self.logger.error("except")
            # unlock job output report
            self.taskBuffer.unlockJobOutputReport(panda_id=self.jobID,
                                                  attempt_nr=self.attemptNr,
                                                  pid=self.pid,
                                                  lock_offset=self.lock_offset)

Пример #8

Показать файл

 def run(self):
     self.lock.acquire()
     try:
         for vuid,name,modDate in self.datasets:
             _logger.debug("Freezer start %s %s" % (modDate,name))
             self.proxyLock.acquire()
             retF,resF = taskBuffer.querySQLS("SELECT /*+ index(tab FILESTABLE4_DESTDBLOCK_IDX) */ PandaID,status FROM ATLAS_PANDA.filesTable4 tab WHERE destinationDBlock=:destinationDBlock ",
                                          {':destinationDBlock':name})
             self.proxyLock.release()
             if retF < 0:
                 _logger.error("SQL error")
             else:
                 allFinished = True
                 onePandaID = None
                 for tmpPandaID,tmpFileStatus in resF:
                     onePandaID = tmpPandaID
                     if not tmpFileStatus in ['ready', 'failed', 'skipped', 'merging', 'finished']:
                         allFinished = False
                         break
                 # check sub datasets in the jobset for event service job
                 if allFinished:
                     self.proxyLock.acquire()
                     tmpJobs = taskBuffer.getFullJobStatus([onePandaID])
                     self.proxyLock.release()
                     if len(tmpJobs) > 0 and tmpJobs[0] is not None:
                         if EventServiceUtils.isEventServiceMerge(tmpJobs[0]):
                             self.proxyLock.acquire()
                             cThr = Closer(taskBuffer, [], tmpJobs[0])
                             allFinished = cThr.checkSubDatasetsInJobset()
                             self.proxyLock.release()
                             _logger.debug("closer checked sub datasets in the jobset for %s : %s" % (name, allFinished))
                 # no files in filesTable
                 if allFinished:
                     _logger.debug("freeze %s " % name)
                     dsExists = True
                     if name.startswith('pandaddm_') or name.startswith('user.') or name.startswith('group.') \
                             or name.startswith('hc_test.') or name.startswith('panda.um.'):
                         dsExists = False
                     if name.startswith('panda.um.'):
                         self.proxyLock.acquire()
                         retMer,resMer = taskBuffer.querySQLS("SELECT /*+ index(tab FILESTABLE4_DESTDBLOCK_IDX) */ PandaID FROM ATLAS_PANDA.filesTable4 tab WHERE destinationDBlock=:destinationDBlock AND status IN (:statusM,:statusF) ",
                                                              {':destinationDBlock':name,
                                                               ':statusM':'merging',
                                                               ':statusF':'failed'})
                         self.proxyLock.release()
                         if resMer is not None and len(resMer)>0:
                             mergeID = resMer[0][0]
                             # get merging jobs
                             self.proxyLock.acquire()
                             mergingJobs = taskBuffer.peekJobs([mergeID],fromDefined=False,fromArchived=False,fromWaiting=False)
                             self.proxyLock.release()    
                             mergeJob = mergingJobs[0]
                             if mergeJob is not None:
                                 tmpDestDBlocks = []
                                 # get destDBlock
                                 for tmpFile in mergeJob.Files:
                                     if tmpFile.type in ['output','log']:
                                         if not tmpFile.destinationDBlock in tmpDestDBlocks:
                                             tmpDestDBlocks.append(tmpFile.destinationDBlock)
                                 # run
                                 _logger.debug("start JEDI closer for %s " % name)
                                 self.proxyLock.acquire()
                                 cThr = Closer(taskBuffer,tmpDestDBlocks,mergeJob)
                                 cThr.start()
                                 cThr.join()
                                 self.proxyLock.release()
                                 _logger.debug("end JEDI closer for %s " % name)
                                 continue
                             else:
                                 _logger.debug("failed to get merging job for %s " % name)
                         else:
                             _logger.debug("failed to get merging file for %s " % name)
                         status,out = True,''
                     elif dsExists:
                         # check if dataset exists
                         status,out = rucioAPI.getMetaData(name)
                         if status == True:
                             if out is not None:
                                 try:
                                     rucioAPI.closeDataset(name)
                                     status = True
                                 except Exception:
                                     errtype,errvalue = sys.exc_info()[:2]
                                     out = 'failed to freeze : {0} {1}'.format(errtype,errvalue)
                                     status = False
                             else:
                                 # dataset not exist
                                 status,out = True,''
                                 dsExists = False
                     else:
                         status,out = True,''
                     if not status:
                         _logger.error('{0} failed to freeze with {1}'.format(name,out))
                     else:
                         self.proxyLock.acquire()
                         varMap = {}
                         varMap[':vuid'] = vuid
                         varMap[':status'] = 'completed' 
                         taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET status=:status,modificationdate=CURRENT_DATE WHERE vuid=:vuid",
                                          varMap)
                         self.proxyLock.release()                            
                         if name.startswith('pandaddm_') or name.startswith('panda.um.') or not dsExists:
                             continue
                         # set tobedeleted to dis
                         setTobeDeletedToDis(name)
                         # count # of files
                         status,out = rucioAPI.getNumberOfFiles(name)
                         if status is not True:
                             if status is False:
                                 _logger.error(out)
                         else:
                             _logger.debug(out)                                            
                             try:
                                 nFile = int(out)
                                 _logger.debug(nFile)
                                 if nFile == 0:
                                     # erase dataset
                                     _logger.debug('erase %s' % name)                                
                                     status,out = rucioAPI.eraseDataset(name)
                                     _logger.debug('OK with %s' % name)
                             except Exception:
                                 pass
                 else:
                     _logger.debug("wait %s " % name)
                     self.proxyLock.acquire()                        
                     taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET modificationdate=CURRENT_DATE WHERE vuid=:vuid", {':vuid':vuid})
                     self.proxyLock.release()                                                    
             _logger.debug("end %s " % name)
     except Exception:
         errStr = traceback.format_exc()
         _logger.error(errStr)
     self.pool.remove(self)
     self.lock.release()

Пример #9

Показать файл

 def updateJobs(self, jobList, tmpLog):
     updateJobs = []
     failedJobs = []
     activateJobs = []
     waitingJobs = []
     closeJobs = []
     # sort out jobs
     for job in jobList:
         # failed jobs
         if job.jobStatus in ['failed', 'cancelled']:
             failedJobs.append(job)
         # waiting
         elif job.jobStatus == 'waiting':
             waitingJobs.append(job)
         # no input jobs
         elif job.dispatchDBlock == 'NULL':
             activateJobs.append(job)
         # normal jobs
         else:
             # change status
             job.jobStatus = "assigned"
             updateJobs.append(job)
     # trigger merge generation if all events are done
     newActivateJobs = []
     nFinished = 0
     for job in activateJobs:
         if job.notDiscardEvents() and job.allOkEvents(
         ) and not EventServiceUtils.isEventServiceMerge(job):
             self.taskBuffer.activateJobs([job])
             # change status
             job.jobStatus = "finished"
             self.taskBuffer.updateJobs([job], False)
             nFinished += 1
         else:
             newActivateJobs.append(job)
     activateJobs = newActivateJobs
     tmpLog.debug('# of finished jobs in activated : {0}'.format(nFinished))
     newUpdateJobs = []
     nFinished = 0
     for job in updateJobs:
         if job.notDiscardEvents() and job.allOkEvents(
         ) and not EventServiceUtils.isEventServiceMerge(job):
             self.taskBuffer.updateJobs([job], True)
             # change status
             job.jobStatus = "finished"
             self.taskBuffer.updateJobs([job], True)
             nFinished += 1
         else:
             newUpdateJobs.append(job)
     updateJobs = newUpdateJobs
     tmpLog.debug('# of finished jobs in defined : {0}'.format(nFinished))
     # update DB
     tmpLog.debug('# of activated jobs : {0}'.format(len(activateJobs)))
     self.taskBuffer.activateJobs(activateJobs)
     tmpLog.debug('# of updated jobs : {0}'.format(len(updateJobs)))
     self.taskBuffer.updateJobs(updateJobs, True)
     tmpLog.debug('# of failed jobs : {0}'.format(len(failedJobs)))
     self.taskBuffer.updateJobs(failedJobs, True)
     tmpLog.debug('# of waiting jobs : {0}'.format(len(waitingJobs)))
     self.taskBuffer.keepJobs(waitingJobs)
     # delete local values
     del updateJobs
     del failedJobs
     del activateJobs
     del waitingJobs

Пример #10

Показать файл

Файл: AtlasTaskSetupper.py Проект: PanDAWMS/panda-jedi

 def doSetup(self,taskSpec,datasetToRegister,pandaJobs):
     # make logger
     tmpLog = MsgWrapper(logger,"<jediTaskID={0}>".format(taskSpec.jediTaskID))
     tmpLog.info('start label={0} taskType={1}'.format(taskSpec.prodSourceLabel,taskSpec.taskType))
     # returns
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     retOK       = self.SC_SUCCEEDED
     try:
         # get DDM I/F
         ddmIF = self.ddmIF.getInterface(taskSpec.vo)
         # register datasets
         if datasetToRegister != [] or taskSpec.prodSourceLabel in ['user']:
             # prod vs anal
             userSetup = False
             if taskSpec.prodSourceLabel in ['user']:
                 userSetup = True
                 # collect datasetID to register datasets/containers just in case
                 for tmpPandaJob in pandaJobs:
                     if not tmpPandaJob.produceUnMerge():
                         for tmpFileSpec in tmpPandaJob.Files:
                             if tmpFileSpec.type in ['output','log']:
                                 if not tmpFileSpec.datasetID in datasetToRegister:
                                     datasetToRegister.append(tmpFileSpec.datasetID)
             tmpLog.info('datasetToRegister={0}'.format(str(datasetToRegister)))
             # get site mapper
             siteMapper = self.taskBufferIF.getSiteMapper()
             # loop over all datasets
             avDatasetList = []
             cnDatasetMap  = {}
             for datasetID in datasetToRegister:
                 # get output and log datasets
                 tmpLog.info('getting datasetSpec with datasetID={0}'.format(datasetID))
                 tmpStat,datasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(taskSpec.jediTaskID,
                                                                               datasetID)
                 if not tmpStat:
                     tmpLog.error('failed to get output and log datasets')
                     return retFatal
                 if datasetSpec.isPseudo():
                     tmpLog.info('skip pseudo dataset')
                     continue
                 # DDM backend
                 ddmBackEnd = taskSpec.getDdmBackEnd()
                 tmpLog.info('checking {0}'.format(datasetSpec.datasetName)) 
                 # check if dataset and container are available in DDM
                 for targetName in [datasetSpec.datasetName,datasetSpec.containerName]:
                     if targetName == None:
                         continue
                     if not targetName in avDatasetList:
                         # set lifetime
                         if targetName.startswith('panda'):
                             if datasetSpec.type == 'trn_log' and taskSpec.prodSourceLabel == 'managed':
                                 lifetime = 365
                             else:
                                 lifetime = 14
                         else:
                             lifetime = None
                         # check dataset/container in DDM
                         tmpList = ddmIF.listDatasets(targetName)
                         if tmpList == []:
                             # get location
                             location = None
                             locForRule = None
                             if targetName == datasetSpec.datasetName:
                                 # dataset
                                 if datasetSpec.site in ['',None]:
                                     if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                         locForRule = datasetSpec.destination
                                     elif DataServiceUtils.getDestinationSE(datasetSpec.storageToken) != None:
                                         location = DataServiceUtils.getDestinationSE(datasetSpec.storageToken)
                                     elif taskSpec.cloud != None:
                                         # use T1 SE
                                         tmpT1Name = siteMapper.getCloud(taskSpec.cloud)['source']
                                         location = siteMapper.getDdmEndpoint(tmpT1Name,datasetSpec.storageToken)
                                 else:
                                     tmpLog.info('site={0} token='.format(datasetSpec.site,datasetSpec.storageToken))
                                     location = siteMapper.getDdmEndpoint(datasetSpec.site,datasetSpec.storageToken)
                             if locForRule == None:
                                 locForRule = location
                             # set metadata
                             if taskSpec.prodSourceLabel in ['managed','test'] and targetName == datasetSpec.datasetName:
                                 metaData = {}
                                 metaData['task_id'] = taskSpec.jediTaskID
                                 if not taskSpec.campaign in [None,'']:
                                     metaData['campaign'] = taskSpec.campaign 
                                 if datasetSpec.getTransient() != None:
                                     metaData['transient'] = datasetSpec.getTransient()
                             else:
                                 metaData = None
                             # register dataset/container
                             tmpLog.info('registering {0} with location={1} backend={2} lifetime={3} meta={4}'.format(targetName,
                                                                                                                      location,
                                                                                                                      ddmBackEnd,
                                                                                                                      lifetime,
                                                                                                                      str(metaData)))
                             tmpStat = ddmIF.registerNewDataset(targetName,backEnd=ddmBackEnd,location=location,
                                                                lifetime=lifetime,metaData=metaData)
                             if not tmpStat:
                                 tmpLog.error('failed to register {0}'.format(targetName))
                                 return retFatal
                             # procedures for user 
                             if userSetup or DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                 # register location
                                 tmpToRegister = False
                                 if userSetup and targetName == datasetSpec.datasetName and not datasetSpec.site in ['',None]:
                                     userName = taskSpec.userName
                                     grouping = None
                                     tmpToRegister = True
                                 elif DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                     userName = None
                                     grouping = 'NONE'
                                     tmpToRegister = True
                                 if tmpToRegister:
                                     activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                                     tmpLog.info('registering location={0} lifetime={1}days activity={2} grouping={3}'.format(locForRule,lifetime,
                                                                                                                              activity,grouping))
                                     tmpStat = ddmIF.registerDatasetLocation(targetName,locForRule,owner=userName,
                                                                             lifetime=lifetime,backEnd=ddmBackEnd,
                                                                             activity=activity,grouping=grouping)
                                     if not tmpStat:
                                         tmpLog.error('failed to register location {0} for {1}'.format(locForRule,
                                                                                                       targetName))
                                         return retFatal
                                     # double copy
                                     if userSetup and datasetSpec.type == 'output':
                                         if datasetSpec.destination != datasetSpec.site:
                                             tmpLog.info('skip making double copy as destination={0} is not site={1}'.format(datasetSpec.destination,
                                                                                                                             datasetSpec.site))
                                         else:
                                             locForDouble = '(type=SCRATCHDISK)\\notforextracopy=1'
                                             tmpMsg  = 'registering double copy '
                                             tmpMsg += 'location="{0}" lifetime={1}days activity={2} for dataset={3}'.format(locForDouble,lifetime,
                                                                                                                             activity,targetName)
                                             tmpLog.info(tmpMsg)
                                             tmpStat = ddmIF.registerDatasetLocation(targetName,locForDouble,copies=2,owner=userName,
                                                                                     lifetime=lifetime,activity=activity,
                                                                                     grouping='NONE',weight='freespace',
                                                                                     ignore_availability=False)
                                             if not tmpStat:
                                                 tmpLog.error('failed to register double copylocation {0} for {1}'.format(locForDouble,
                                                                                                                        targetName))
                                                 return retFatal
                             avDatasetList.append(targetName)
                         else:
                             tmpLog.info('{0} already registered'.format(targetName))
                 # check if dataset is in the container
                 if datasetSpec.containerName != None and datasetSpec.containerName != datasetSpec.datasetName:
                     # get list of constituent datasets in the container
                     if not cnDatasetMap.has_key(datasetSpec.containerName):
                         cnDatasetMap[datasetSpec.containerName] = ddmIF.listDatasetsInContainer(datasetSpec.containerName)
                     # add dataset
                     if not datasetSpec.datasetName in cnDatasetMap[datasetSpec.containerName]:
                         tmpLog.info('adding {0} to {1}'.format(datasetSpec.datasetName,datasetSpec.containerName)) 
                         tmpStat = ddmIF.addDatasetsToContainer(datasetSpec.containerName,[datasetSpec.datasetName],
                                                                backEnd=ddmBackEnd)
                         if not tmpStat:
                             tmpLog.error('failed to add {0} to {1}'.format(datasetSpec.datasetName,
                                                                            datasetSpec.containerName))
                             return retFatal
                         cnDatasetMap[datasetSpec.containerName].append(datasetSpec.datasetName)
                     else:
                         tmpLog.info('{0} already in {1}'.format(datasetSpec.datasetName,datasetSpec.containerName)) 
                 # update dataset
                 datasetSpec.status = 'registered'
                 self.taskBufferIF.updateDataset_JEDI(datasetSpec,{'jediTaskID':taskSpec.jediTaskID,
                                                                   'datasetID':datasetID})
         # register ES datasets
         if taskSpec.registerEsFiles():
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             location = None
             metaData = {}
             metaData['task_id'] = taskSpec.jediTaskID
             metaData['hidden']  = True
             tmpLog.info('registering ES dataset {0} with location={1} meta={2}'.format(targetName,
                                                                                        location,
                                                                                        str(metaData)))
             tmpStat = ddmIF.registerNewDataset(targetName,location=location,metaData=metaData,
                                                resurrect=True)
             if not tmpStat:
                 tmpLog.error('failed to register ES dataset {0}'.format(targetName))
                 return retFatal
             # register rule
             location = 'type=DATADISK' 
             activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
             grouping = 'NONE'
             tmpLog.info('registering location={0} activity={1} grouping={2}'.format(location,
                                                                                     activity,
                                                                                     grouping))
             tmpStat = ddmIF.registerDatasetLocation(targetName,location,activity=activity,
                                                     grouping=grouping)
             if not tmpStat:
                 tmpLog.error('failed to register location {0} with {2} for {1}'.format(location,
                                                                                        targetName,
                                                                                        activity))
                 return retFatal
         # open datasets
         if taskSpec.prodSourceLabel in ['managed','test']:
             # get the list of output/log datasets
             outDatasetList = []
             for tmpPandaJob in pandaJobs:
                 for tmpFileSpec in tmpPandaJob.Files:
                     if tmpFileSpec.type in ['output','log']:
                         if not tmpFileSpec.destinationDBlock in outDatasetList:
                             outDatasetList.append(tmpFileSpec.destinationDBlock)
             # open datasets
             for outDataset in outDatasetList:
                 tmpLog.info('open {0}'.format(outDataset))
                 ddmIF.openDataset(outDataset)
                 # unset lifetime
                 ddmIF.setDatasetMetadata(outDataset,'lifetime',None)
         # return
         tmpLog.info('done')        
         return retOK
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doSetup failed with {0}:{1}'.format(errtype.__name__,errvalue))
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retFatal

Пример #11

Показать файл

Файл: Watcher.py Проект: virthead/panda-server

    def run(self):
        try:
            while True:
                _logger.debug('%s start' % self.pandaID)
                # query job
                job = self.taskBuffer.peekJobs([self.pandaID],
                                               fromDefined=False,
                                               fromArchived=False,
                                               fromWaiting=False)[0]
                _logger.debug('%s in %s' % (self.pandaID, job.jobStatus))
                # check job status
                if job is None:
                    _logger.debug('%s escape : not found' % self.pandaID)
                    return
                if job.jobStatus not in [
                        'running', 'sent', 'starting', 'holding', 'stagein',
                        'stageout'
                ]:
                    if job.jobStatus == 'transferring' and (
                            job.prodSourceLabel in ['user', 'panda']
                            or job.jobSubStatus not in [None, 'NULL', '']):
                        pass
                    else:
                        _logger.debug('%s escape : %s' %
                                      (self.pandaID, job.jobStatus))
                        return
                # time limit
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                    minutes=self.sleepTime)
                if job.modificationTime < timeLimit or (
                        job.endTime != 'NULL' and job.endTime < timeLimit):
                    _logger.debug(
                        '%s %s lastmod:%s endtime:%s' %
                        (job.PandaID, job.jobStatus, str(
                            job.modificationTime), str(job.endTime)))
                    destDBList = []
                    if job.jobStatus == 'sent':
                        # sent job didn't receive reply from pilot within 30 min
                        job.jobDispatcherErrorCode = ErrorCode.EC_SendError
                        job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min"
                    elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL':
                        # lost heartbeat
                        if job.jobDispatcherErrorDiag == 'NULL':
                            if job.endTime == 'NULL':
                                # normal lost heartbeat
                                job.jobDispatcherErrorCode = ErrorCode.EC_Watcher
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(
                                    job.modificationTime)
                            else:
                                if job.jobStatus == 'holding':
                                    job.jobDispatcherErrorCode = ErrorCode.EC_Holding
                                elif job.jobStatus == 'transferring':
                                    job.jobDispatcherErrorCode = ErrorCode.EC_Transferring
                                else:
                                    job.jobDispatcherErrorCode = ErrorCode.EC_Timeout
                                job.jobDispatcherErrorDiag = 'timeout in {0} : last heartbeat at {1}'.format(
                                    job.jobStatus, str(job.endTime))
                            # get worker
                            workerSpecs = self.taskBuffer.getWorkersForJob(
                                job.PandaID)
                            if len(workerSpecs) > 0:
                                workerSpec = workerSpecs[0]
                                if workerSpec.status in [
                                        'finished', 'failed', 'cancelled',
                                        'missed'
                                ]:
                                    job.supErrorCode = SupErrors.error_codes[
                                        'WORKER_ALREADY_DONE']
                                    job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(
                                        workerSpec.status,
                                        str(workerSpec.endTime),
                                        workerSpec.diagMessage)
                                    job.supErrorDiag = JobSpec.truncateStringAttr(
                                        'supErrorDiag', job.supErrorDiag)
                    else:
                        # job recovery failed
                        job.jobDispatcherErrorCode = ErrorCode.EC_Recovery
                        job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (
                            self.sleepTime / 60)
                    # set job status
                    job.jobStatus = 'failed'
                    # set endTime for lost heartbeat
                    if job.endTime == 'NULL':
                        # normal lost heartbeat
                        job.endTime = job.modificationTime
                    # set files status
                    for file in job.Files:
                        if file.type == 'output' or file.type == 'log':
                            file.status = 'failed'
                            if file.destinationDBlock not in destDBList:
                                destDBList.append(file.destinationDBlock)
                    # event service
                    if EventServiceUtils.isEventServiceJob(
                            job
                    ) and not EventServiceUtils.isJobCloningJob(job):
                        eventStat = self.taskBuffer.getEventStat(
                            job.jediTaskID, job.PandaID)
                        # set sub status when no sucessful events
                        if EventServiceUtils.ST_finished not in eventStat:
                            job.jobSubStatus = 'es_heartbeat'
                    # update job
                    self.taskBuffer.updateJobs([job], False)
                    # start closer
                    if job.jobStatus == 'failed':

                        source = 'jobDispatcherErrorCode'
                        error_code = job.jobDispatcherErrorCode
                        error_diag = job.jobDispatcherErrorDiag

                        try:
                            _logger.debug(
                                "Watcher will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(
                                self.taskBuffer, job.PandaID, source,
                                error_code, error_diag, job.attemptNr)
                            _logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            _logger.debug(
                                "apply_retrial_rules excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        try:

                            _logger.debug("Watcher.run will peek the job")
                            job_tmp = self.taskBuffer.peekJobs(
                                [job.PandaID],
                                fromDefined=False,
                                fromArchived=True,
                                fromWaiting=False)[0]
                            if job_tmp.taskBufferErrorCode:
                                source = 'taskBufferErrorCode'
                                error_code = job_tmp.taskBufferErrorCode
                                error_diag = job_tmp.taskBufferErrorDiag
                                _logger.debug(
                                    "Watcher.run 2 will call apply_retrial_rules"
                                )
                                retryModule.apply_retrial_rules(
                                    self.taskBuffer, job_tmp.PandaID, source,
                                    error_code, error_diag, job_tmp.attemptNr)
                                _logger.debug("apply_retrial_rules 2 is back")
                        except IndexError:
                            pass
                        except Exception as e:
                            self.logger.error(
                                "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        cThr = Closer(self.taskBuffer, destDBList, job)
                        cThr.start()
                        cThr.join()
                    _logger.debug('%s end' % job.PandaID)
                    return
                # single action
                if self.single:
                    return
                # sleep
                time.sleep(60 * self.sleepTime)
        except Exception:
            type, value, traceBack = sys.exc_info()
            _logger.error("run() : %s %s" % (type, value))
            return

Пример #12

Показать файл

Файл: Closer.py Проект: eschanet/QMonit

 def run(self):
     try:
         _logger.debug('%s Start %s' % (self.pandaID,self.job.jobStatus))
         flagComplete    = True
         topUserDsList   = []
         usingMerger     = False        
         disableNotifier = False
         firstIndvDS     = True
         finalStatusDS   = []
         for destinationDBlock in self.destinationDBlocks:
             dsList = []
             _logger.debug('%s start %s' % (self.pandaID,destinationDBlock))
             # ignore tid datasets
             if re.search('_tid[\d_]+$',destinationDBlock):
                 _logger.debug('%s skip %s' % (self.pandaID,destinationDBlock))                
                 continue
             # ignore HC datasets
             if re.search('^hc_test\.',destinationDBlock) is not None or re.search('^user\.gangarbt\.',destinationDBlock) is not None:
                 if re.search('_sub\d+$',destinationDBlock) is None and re.search('\.lib$',destinationDBlock) is None:
                     _logger.debug('%s skip HC %s' % (self.pandaID,destinationDBlock))                
                     continue
             # query dataset
             if destinationDBlock in self.datasetMap:
                 dataset = self.datasetMap[destinationDBlock]
             else:
                 dataset = self.taskBuffer.queryDatasetWithMap({'name':destinationDBlock})
             if dataset is None:
                 _logger.error('%s Not found : %s' % (self.pandaID,destinationDBlock))
                 flagComplete = False
                 continue
             # skip tobedeleted/tobeclosed 
             if dataset.status in ['cleanup','tobeclosed','completed','deleted']:
                 _logger.debug('%s skip %s due to %s' % (self.pandaID,destinationDBlock,dataset.status))
                 continue
             dsList.append(dataset)
             # sort
             dsList.sort()
             # count number of completed files
             notFinish = self.taskBuffer.countFilesWithMap({'destinationDBlock':destinationDBlock,
                                                            'status':'unknown'})
             if notFinish < 0:
                 _logger.error('%s Invalid DB return : %s' % (self.pandaID,notFinish))
                 flagComplete = False                
                 continue
             # check if completed
             _logger.debug('%s notFinish:%s' % (self.pandaID,notFinish))
             if self.job.destinationSE == 'local' and self.job.prodSourceLabel in ['user','panda']:
                 # close non-DQ2 destinationDBlock immediately
                 finalStatus = 'closed'
             elif self.job.lockedby == 'jedi' and self.isTopLevelDS(destinationDBlock):
                 # set it closed in order not to trigger DDM cleanup. It will be closed by JEDI
                 finalStatus = 'closed'
             elif self.job.prodSourceLabel in ['user'] and "--mergeOutput" in self.job.jobParameters \
                      and self.job.processingType != 'usermerge':
                 # merge output files
                 if firstIndvDS:
                     # set 'tobemerged' to only the first dataset to avoid triggering many Mergers for --individualOutDS
                     finalStatus = 'tobemerged'
                     firstIndvDS = False
                 else:
                     finalStatus = 'tobeclosed'
                 # set merging to top dataset
                 usingMerger = True
                 # disable Notifier
                 disableNotifier = True
             elif self.job.produceUnMerge():
                 finalStatus = 'doing'
             else:
                 # set status to 'tobeclosed' to trigger DQ2 closing
                 finalStatus = 'tobeclosed'
             if notFinish == 0 and EventServiceUtils.isEventServiceMerge(self.job):
                 allInJobsetFinished = self.checkSubDatasetsInJobset()
             else:
                 allInJobsetFinished = True
             if notFinish == 0 and allInJobsetFinished: 
                 _logger.debug('%s set %s to dataset : %s' % (self.pandaID,finalStatus,destinationDBlock))
                 # set status
                 dataset.status = finalStatus
                 # update dataset in DB
                 retT = self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ",
                                                       criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'})
                 if len(retT) > 0 and retT[0]==1:
                     finalStatusDS += dsList
                     # close user datasets
                     if self.job.prodSourceLabel in ['user'] and self.job.destinationDBlock.endswith('/') \
                            and (dataset.name.startswith('user') or dataset.name.startswith('group')):
                         # get top-level user dataset 
                         topUserDsName = re.sub('_sub\d+$','',dataset.name)
                         # update if it is the first attempt
                         if topUserDsName != dataset.name and not topUserDsName in topUserDsList and self.job.lockedby != 'jedi':
                             topUserDs = self.taskBuffer.queryDatasetWithMap({'name':topUserDsName})
                             if topUserDs is not None:
                                 # check status
                                 if topUserDs.status in ['completed','cleanup','tobeclosed','deleted',
                                                         'tobemerged','merging']:
                                     _logger.debug('%s skip %s due to status=%s' % (self.pandaID,topUserDsName,topUserDs.status))
                                 else:
                                     # set status
                                     if self.job.processingType.startswith('gangarobot') or \
                                            self.job.processingType.startswith('hammercloud'):
                                         # not trigger freezing for HC datasets so that files can be appended
                                         topUserDs.status = 'completed'
                                     elif not usingMerger:
                                         topUserDs.status = finalStatus
                                     else:
                                         topUserDs.status = 'merging'
                                     # append to avoid repetition
                                     topUserDsList.append(topUserDsName)
                                     # update DB
                                     retTopT = self.taskBuffer.updateDatasets([topUserDs],withLock=True,withCriteria="status<>:crStatus",
                                                                              criteriaMap={':crStatus':topUserDs.status})
                                     if len(retTopT) > 0 and retTopT[0]==1:
                                         _logger.debug('%s set %s to top dataset : %s' % (self.pandaID,topUserDs.status,topUserDsName))
                                     else:
                                         _logger.debug('%s failed to update top dataset : %s' % (self.pandaID,topUserDsName))
                         # get parent dataset for merge job
                         if self.job.processingType == 'usermerge':
                             tmpMatch = re.search('--parentDS ([^ \'\"]+)',self.job.jobParameters)
                             if tmpMatch is None:
                                 _logger.error('%s failed to extract parentDS' % self.pandaID)
                             else:
                                 unmergedDsName = tmpMatch.group(1)
                                 # update if it is the first attempt
                                 if not unmergedDsName in topUserDsList:
                                     unmergedDs = self.taskBuffer.queryDatasetWithMap({'name':unmergedDsName})
                                     if unmergedDs is None:
                                         _logger.error('%s failed to get parentDS=%s from DB' % (self.pandaID,unmergedDsName))
                                     else:
                                         # check status
                                         if unmergedDs.status in ['completed','cleanup','tobeclosed']:
                                             _logger.debug('%s skip %s due to status=%s' % (self.pandaID,unmergedDsName,unmergedDs.status))
                                         else:
                                             # set status
                                             unmergedDs.status = finalStatus
                                             # append to avoid repetition
                                             topUserDsList.append(unmergedDsName)
                                             # update DB
                                             retTopT = self.taskBuffer.updateDatasets([unmergedDs],withLock=True,withCriteria="status<>:crStatus",
                                                                                      criteriaMap={':crStatus':unmergedDs.status})
                                             if len(retTopT) > 0 and retTopT[0]==1:
                                                 _logger.debug('%s set %s to parent dataset : %s' % (self.pandaID,unmergedDs.status,unmergedDsName))
                                             else:
                                                 _logger.debug('%s failed to update parent dataset : %s' % (self.pandaID,unmergedDsName))
                     # start Activator
                     if re.search('_sub\d+$',dataset.name) is None:
                         if self.job.prodSourceLabel=='panda' and self.job.processingType in ['merge','unmerge']:
                             # don't trigger Activator for merge jobs
                             pass
                         else:
                             if self.job.jobStatus == 'finished':
                                 aThr = Activator(self.taskBuffer,dataset)
                                 aThr.start()
                                 aThr.join()
                 else:
                     # unset flag since another thread already updated 
                     #flagComplete = False
                     pass
             else:
                 # update dataset in DB
                 self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ",
                                                criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'})
                 # unset flag
                 flagComplete = False
             # end
             _logger.debug('%s end %s' % (self.pandaID,destinationDBlock))
         # special actions for vo
         if flagComplete:
             closerPluginClass = panda_config.getPlugin('closer_plugins',self.job.VO)
             if closerPluginClass is None and self.job.VO == 'atlas':
                 # use ATLAS plugin for ATLAS
                 from pandaserver.dataservice.CloserAtlasPlugin import CloserAtlasPlugin
                 closerPluginClass = CloserAtlasPlugin
             if closerPluginClass is not None:
                 closerPlugin = closerPluginClass(self.job,finalStatusDS,_logger)
                 closerPlugin.execute()
         # change pending jobs to failed
         finalizedFlag = True
         if flagComplete and self.job.prodSourceLabel=='user':
             _logger.debug('%s finalize %s %s' % (self.pandaID,self.job.prodUserName,self.job.jobDefinitionID))
             finalizedFlag = self.taskBuffer.finalizePendingJobs(self.job.prodUserName,self.job.jobDefinitionID)
             _logger.debug('%s finalized with %s' % (self.pandaID,finalizedFlag))
         # update unmerged datasets in JEDI to trigger merging
         if flagComplete and self.job.produceUnMerge() and finalStatusDS != []:
             if finalizedFlag:
                 tmpStat = self.taskBuffer.updateUnmergedDatasets(self.job,finalStatusDS)
                 _logger.debug('%s updated unmerged datasets with %s' % (self.pandaID,tmpStat))
         # start notifier
         _logger.debug('%s source:%s complete:%s' % (self.pandaID,self.job.prodSourceLabel,flagComplete))
         if (self.job.jobStatus != 'transferring') and ((flagComplete and self.job.prodSourceLabel=='user') or \
            (self.job.jobStatus=='failed' and self.job.prodSourceLabel=='panda')) and \
            self.job.lockedby != 'jedi':
             # don't send email for merge jobs
             if (not disableNotifier) and not self.job.processingType in ['merge','unmerge']:
                 useNotifier = True
                 summaryInfo = {}
                 # check all jobDefIDs in jobsetID
                 if not self.job.jobsetID in [0,None,'NULL']:
                     useNotifier,summaryInfo = self.taskBuffer.checkDatasetStatusForNotifier(self.job.jobsetID,self.job.jobDefinitionID,
                                                                                             self.job.prodUserName)
                     _logger.debug('%s useNotifier:%s' % (self.pandaID,useNotifier))
                 if useNotifier:
                     _logger.debug('%s start Notifier' % self.pandaID)
                     nThr = Notifier.Notifier(self.taskBuffer,self.job,self.destinationDBlocks,summaryInfo)
                     nThr.run()
                     _logger.debug('%s end Notifier' % self.pandaID)                    
         _logger.debug('%s End' % self.pandaID)
     except Exception:
         errType,errValue = sys.exc_info()[:2]
         _logger.error("%s %s" % (errType,errValue))

Пример #13

Показать файл

 def appendJob(self, job, siteMapperCache=None):
     # event service merge
     if EventServiceUtils.isEventServiceMerge(job):
         isEventServiceMerge = True
     else:
         isEventServiceMerge = False
     # PandaID
     self.data['PandaID'] = job.PandaID
     # prodSourceLabel
     self.data['prodSourceLabel'] = job.prodSourceLabel
     # swRelease
     self.data['swRelease'] = job.AtlasRelease
     # homepackage
     self.data['homepackage'] = job.homepackage
     # transformation
     self.data['transformation'] = job.transformation
     # job name
     self.data['jobName'] = job.jobName
     # job definition ID
     self.data['jobDefinitionID'] = job.jobDefinitionID
     # cloud
     self.data['cloud'] = job.cloud
     # files
     strIFiles = ''
     strOFiles = ''
     strDispatch = ''
     strDisToken = ''
     strDisTokenForOutput = ''
     strDestination = ''
     strRealDataset = ''
     strRealDatasetIn = ''
     strProdDBlock = ''
     strDestToken = ''
     strProdToken = ''
     strProdTokenForOutput = ''
     strGUID = ''
     strFSize = ''
     strCheckSum = ''
     strFileDestinationSE = ''
     strScopeIn = ''
     strScopeOut = ''
     strScopeLog = ''
     logFile = ''
     logGUID = ''
     ddmEndPointIn = []
     ddmEndPointOut = []
     noOutput = []
     siteSpec = None
     inDsLfnMap = {}
     inLFNset = set()
     if siteMapperCache is not None:
         siteMapper = siteMapperCache.getObj()
         siteSpec = siteMapper.getSite(job.computingSite)
         # resolve destSE
         try:
             job.destinationSE = siteMapper.resolveNucleus(
                 job.destinationSE)
             for tmpFile in job.Files:
                 tmpFile.destinationSE = siteMapper.resolveNucleus(
                     tmpFile.destinationSE)
         except Exception:
             pass
         siteMapperCache.releaseObj()
     for file in job.Files:
         if file.type == 'input':
             if EventServiceUtils.isJumboJob(job) and file.lfn in inLFNset:
                 pass
             else:
                 inLFNset.add(file.lfn)
                 if strIFiles != '':
                     strIFiles += ','
                 strIFiles += file.lfn
                 if strDispatch != '':
                     strDispatch += ','
                 strDispatch += file.dispatchDBlock
                 if strDisToken != '':
                     strDisToken += ','
                 strDisToken += file.dispatchDBlockToken
                 strProdDBlock += '%s,' % file.prodDBlock
                 if not isEventServiceMerge:
                     strProdToken += '%s,' % file.prodDBlockToken
                 else:
                     strProdToken += '%s,' % job.metadata[1][file.lfn]
                 if strGUID != '':
                     strGUID += ','
                 strGUID += file.GUID
                 strRealDatasetIn += '%s,' % file.dataset
                 strFSize += '%s,' % file.fsize
                 if file.checksum not in ['', 'NULL', None]:
                     strCheckSum += '%s,' % file.checksum
                 else:
                     strCheckSum += '%s,' % file.md5sum
                 strScopeIn += '%s,' % file.scope
                 ddmEndPointIn.append(
                     self.getDdmEndpoint(siteSpec, file.dispatchDBlockToken,
                                         'input', job.prodSourceLabel,
                                         job.job_label))
                 if file.dataset not in inDsLfnMap:
                     inDsLfnMap[file.dataset] = []
                 inDsLfnMap[file.dataset].append(file.lfn)
         if file.type == 'output' or file.type == 'log':
             if strOFiles != '':
                 strOFiles += ','
             strOFiles += file.lfn
             if strDestination != '':
                 strDestination += ','
             strDestination += file.destinationDBlock
             if strRealDataset != '':
                 strRealDataset += ','
             strRealDataset += file.dataset
             strFileDestinationSE += '%s,' % file.destinationSE
             if file.type == 'log':
                 logFile = file.lfn
                 logGUID = file.GUID
                 strScopeLog = file.scope
             else:
                 strScopeOut += '%s,' % file.scope
             if strDestToken != '':
                 strDestToken += ','
             strDestToken += re.sub(
                 '^ddd:', 'dst:',
                 file.destinationDBlockToken.split(',')[0])
             strDisTokenForOutput += '%s,' % file.dispatchDBlockToken
             strProdTokenForOutput += '%s,' % file.prodDBlockToken
             ddmEndPointOut.append(
                 self.getDdmEndpoint(
                     siteSpec,
                     file.destinationDBlockToken.split(',')[0], 'output',
                     job.prodSourceLabel, job.job_label))
             if file.isAllowedNoOutput():
                 noOutput.append(file.lfn)
     # inFiles
     self.data['inFiles'] = strIFiles
     # dispatch DBlock
     self.data['dispatchDblock'] = strDispatch
     # dispatch DBlock space token
     self.data['dispatchDBlockToken'] = strDisToken
     # dispatch DBlock space token for output
     self.data['dispatchDBlockTokenForOut'] = strDisTokenForOutput[:-1]
     # outFiles
     self.data['outFiles'] = strOFiles
     # destination DBlock
     self.data['destinationDblock'] = strDestination
     # destination DBlock space token
     self.data['destinationDBlockToken'] = strDestToken
     # prod DBlocks
     self.data['prodDBlocks'] = strProdDBlock[:-1]
     # prod DBlock space token
     self.data['prodDBlockToken'] = strProdToken[:-1]
     # real output datasets
     self.data['realDatasets'] = strRealDataset
     # real output datasets
     self.data['realDatasetsIn'] = strRealDatasetIn[:-1]
     # file's destinationSE
     self.data['fileDestinationSE'] = strFileDestinationSE[:-1]
     # log filename
     self.data['logFile'] = logFile
     # log GUID
     self.data['logGUID'] = logGUID
     # jobPars
     self.data['jobPars'], ppSteps = job.extractMultiStepExec()
     if ppSteps is not None:
         self.data.update(ppSteps)
     if job.to_encode_job_params():
         self.data['jobPars'] = base64.b64encode(
             self.data['jobPars'].encode()).decode()
     # attempt number
     self.data['attemptNr'] = job.attemptNr
     # GUIDs
     self.data['GUID'] = strGUID
     # checksum
     self.data['checksum'] = strCheckSum[:-1]
     # fsize
     self.data['fsize'] = strFSize[:-1]
     # scope
     self.data['scopeIn'] = strScopeIn[:-1]
     self.data['scopeOut'] = strScopeOut[:-1]
     self.data['scopeLog'] = strScopeLog
     # DDM endpoints
     try:
         self.data['ddmEndPointIn'] = ','.join(ddmEndPointIn)
     except TypeError:
         self.data['ddmEndPointIn'] = ''
     try:
         self.data['ddmEndPointOut'] = ','.join(ddmEndPointOut)
     except TypeError:
         self.data['ddmEndPointOut'] = ''
     # destinationSE
     self.data['destinationSE'] = job.destinationSE
     # user ID
     self.data['prodUserID'] = job.prodUserID
     # CPU count
     self.data['maxCpuCount'] = job.maxCpuCount
     # RAM count
     self.data['minRamCount'] = job.minRamCount
     # disk count
     self.data['maxDiskCount'] = job.maxDiskCount
     # cmtconfig
     if ppSteps is None:
         self.data['cmtConfig'] = job.cmtConfig
     else:
         self.data['cmtConfig'] = ''
     # processingType
     self.data['processingType'] = job.processingType
     # transferType
     self.data['transferType'] = job.transferType
     # sourceSite
     self.data['sourceSite'] = job.sourceSite
     # current priority
     self.data['currentPriority'] = job.currentPriority
     # taskID
     if job.lockedby == 'jedi':
         self.data['taskID'] = job.jediTaskID
     else:
         self.data['taskID'] = job.taskID
     # core count
     if job.coreCount in ['NULL', None]:
         self.data['coreCount'] = 1
     else:
         self.data['coreCount'] = job.coreCount
     # jobsetID
     self.data['jobsetID'] = job.jobsetID
     # nucleus
     self.data['nucleus'] = job.nucleus
     # walltime
     self.data['maxWalltime'] = job.maxWalltime
     # looping check
     if job.is_no_looping_check():
         self.data['loopingCheck'] = False
     # debug mode
     if job.specialHandling is not None and 'debug' in job.specialHandling:
         self.data['debug'] = 'True'
     # event service or job cloning
     if EventServiceUtils.isJobCloningJob(job):
         self.data['cloneJob'] = EventServiceUtils.getJobCloningType(job)
     elif EventServiceUtils.isEventServiceJob(
             job) or EventServiceUtils.isJumboJob(job):
         self.data['eventService'] = 'True'
         # prod DBlock space token for pre-merging output
         self.data['prodDBlockTokenForOutput'] = strProdTokenForOutput[:-1]
     # event service merge
     if isEventServiceMerge:
         self.data['eventServiceMerge'] = 'True'
         # write to file for ES merge
         writeToFileStr = ''
         try:
             for outputName in job.metadata[0]:
                 inputList = job.metadata[0][outputName]
                 writeToFileStr += 'inputFor_{0}:'.format(outputName)
                 for tmpInput in inputList:
                     writeToFileStr += '{0},'.format(tmpInput)
                 writeToFileStr = writeToFileStr[:-1]
                 writeToFileStr += '^'
             writeToFileStr = writeToFileStr[:-1]
         except Exception:
             pass
         self.data['writeToFile'] = writeToFileStr
     elif job.writeInputToFile():
         try:
             # write input to file
             writeToFileStr = ''
             for inDS in inDsLfnMap:
                 inputList = inDsLfnMap[inDS]
                 inDS = re.sub('/$', '', inDS)
                 inDS = inDS.split(':')[-1]
                 writeToFileStr += 'tmpin_{0}:'.format(inDS)
                 writeToFileStr += ','.join(inputList)
                 writeToFileStr += '^'
             writeToFileStr = writeToFileStr[:-1]
             self.data['writeToFile'] = writeToFileStr
         except Exception:
             pass
     # replace placeholder
     if EventServiceUtils.isJumboJob(job) or EventServiceUtils.isCoJumboJob(
             job):
         try:
             for inDS in inDsLfnMap:
                 inputList = inDsLfnMap[inDS]
                 inDS = re.sub('/$', '', inDS)
                 inDS = inDS.split(':')[-1]
                 srcStr = 'tmpin__cnt_{0}'.format(inDS)
                 dstStr = ','.join(inputList)
                 self.data['jobPars'] = self.data['jobPars'].replace(
                     srcStr, dstStr)
         except Exception:
             pass
     # no output
     if noOutput != []:
         self.data['allowNoOutput'] = ','.join(noOutput)
     # alternative stage-out
     if job.getAltStgOut() is not None:
         self.data['altStageOut'] = job.getAltStgOut()
     # log to OS
     if job.putLogToOS():
         self.data['putLogToOS'] = 'True'
     # suppress execute string conversion
     if job.noExecStrCnv():
         self.data['noExecStrCnv'] = 'True'
     # in-file positional event number
     if job.inFilePosEvtNum():
         self.data['inFilePosEvtNum'] = 'True'
     # use prefetcher
     if job.usePrefetcher():
         self.data['usePrefetcher'] = 'True'
     # image name
     if job.container_name not in ['NULL', None]:
         self.data['container_name'] = job.container_name
     # IO
     self.data['ioIntensity'] = job.get_task_attribute('ioIntensity')
     self.data['ioIntensityUnit'] = job.get_task_attribute(
         'ioIntensityUnit')
     # HPO
     if job.is_hpo_workflow():
         self.data['isHPO'] = 'True'
     # VP
     if siteSpec is not None:
         scope_input, scope_output = DataServiceUtils.select_scope(
             siteSpec, job.prodSourceLabel, job.job_label)
         if siteSpec.use_vp(scope_input):
             self.data['useVP'] = 'True'

Пример #14

Показать файл

 def extractCommon(self, jediTaskID, taskParamMap, workQueueMapper,
                   splitRule):
     # make task spec
     taskSpec = JediTaskSpec()
     taskSpec.jediTaskID = jediTaskID
     taskSpec.taskName = taskParamMap['taskName']
     taskSpec.userName = taskParamMap['userName']
     taskSpec.vo = taskParamMap['vo']
     taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel']
     taskSpec.taskPriority = taskParamMap['taskPriority']
     taskSpec.currentPriority = taskSpec.taskPriority
     taskSpec.architecture = taskParamMap['architecture']
     taskSpec.transUses = taskParamMap['transUses']
     taskSpec.transHome = taskParamMap['transHome']
     taskSpec.transPath = taskParamMap['transPath']
     taskSpec.processingType = taskParamMap['processingType']
     taskSpec.taskType = taskParamMap['taskType']
     taskSpec.splitRule = splitRule
     taskSpec.startTime = datetime.datetime.utcnow()
     if taskParamMap.has_key('workingGroup'):
         taskSpec.workingGroup = taskParamMap['workingGroup']
     if taskParamMap.has_key('countryGroup'):
         taskSpec.countryGroup = taskParamMap['countryGroup']
     if taskParamMap.has_key('ticketID'):
         taskSpec.ticketID = taskParamMap['ticketID']
     if taskParamMap.has_key('ticketSystemType'):
         taskSpec.ticketSystemType = taskParamMap['ticketSystemType']
     if taskParamMap.has_key('reqID'):
         taskSpec.reqID = taskParamMap['reqID']
     else:
         taskSpec.reqID = jediTaskID
     if taskParamMap.has_key('coreCount'):
         taskSpec.coreCount = taskParamMap['coreCount']
     else:
         taskSpec.coreCount = 1
     if taskParamMap.has_key('walltime'):
         taskSpec.walltime = taskParamMap['walltime']
     else:
         taskSpec.walltime = 0
     if taskParamMap.has_key('walltimeUnit'):
         taskSpec.walltimeUnit = taskParamMap['walltimeUnit']
     if taskParamMap.has_key('outDiskCount'):
         taskSpec.outDiskCount = taskParamMap['outDiskCount']
     else:
         taskSpec.outDiskCount = 0
     if 'outDiskUnit' in taskParamMap:
         taskSpec.outDiskUnit = taskParamMap['outDiskUnit']
     if taskParamMap.has_key('workDiskCount'):
         taskSpec.workDiskCount = taskParamMap['workDiskCount']
     else:
         taskSpec.workDiskCount = 0
     if taskParamMap.has_key('workDiskUnit'):
         taskSpec.workDiskUnit = taskParamMap['workDiskUnit']
     if taskParamMap.has_key('ramCount'):
         taskSpec.ramCount = taskParamMap['ramCount']
     else:
         taskSpec.ramCount = 0
     # HS06 stuff
     if 'cpuTimeUnit' in taskParamMap:
         taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit']
     if 'cpuTime' in taskParamMap:
         taskSpec.cpuTime = taskParamMap['cpuTime']
     if 'cpuEfficiency' in taskParamMap:
         taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency']
     else:
         # 90% of cpu efficiency by default
         taskSpec.cpuEfficiency = 90
     if 'baseWalltime' in taskParamMap:
         taskSpec.baseWalltime = taskParamMap['baseWalltime']
     else:
         # 10min of offset by default
         taskSpec.baseWalltime = 10 * 60
     # for merge
     if 'mergeRamCount' in taskParamMap:
         taskSpec.mergeRamCount = taskParamMap['mergeRamCount']
     if 'mergeCoreCount' in taskParamMap:
         taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount']
     # scout
     if not taskParamMap.has_key(
             'skipScout') and not taskSpec.isPostScout():
         taskSpec.setUseScout(True)
     # cloud
     if taskParamMap.has_key('cloud'):
         self.cloudName = taskParamMap['cloud']
         taskSpec.cloud = self.cloudName
     else:
         # set dummy to force update
         taskSpec.cloud = 'dummy'
         taskSpec.cloud = None
     # site
     if taskParamMap.has_key('site'):
         self.siteName = taskParamMap['site']
         taskSpec.site = self.siteName
     else:
         # set dummy to force update
         taskSpec.site = 'dummy'
         taskSpec.site = None
     # event service
     if taskParamMap.has_key('nEventsPerWorker'):
         taskSpec.eventService = 1
     else:
         taskSpec.eventService = 0
     # goal
     if 'goal' in taskParamMap:
         try:
             taskSpec.goal = int(float(taskParamMap['goal']) * 10)
             if taskSpec.goal >= 1000:
                 taskSpec.goal = None
         except:
             pass
     # campaign
     if taskParamMap.has_key('campaign'):
         taskSpec.campaign = taskParamMap['campaign']
     # work queue
     workQueue, tmpStr = workQueueMapper.getQueueWithSelParams(
         taskSpec.vo,
         taskSpec.prodSourceLabel,
         processingType=taskSpec.processingType,
         workingGroup=taskSpec.workingGroup,
         coreCount=taskSpec.coreCount,
         site=taskSpec.site)
     if workQueue == None:
         errStr = 'workqueue is undefined for vo={0} labal={1} '.format(
             taskSpec.vo, taskSpec.prodSourceLabel)
         errStr += 'processingType={0} workingGroup={1} coreCount={2} '.format(
             taskSpec.processingType, taskSpec.workingGroup,
             taskSpec.coreCount)
         raise RuntimeError, errStr
     taskSpec.workQueue_ID = workQueue.queue_id
     self.taskSpec = taskSpec
     # set split rule
     if 'tgtNumEventsPerJob' in taskParamMap:
         # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used
         if not 'nFilesPerJob' in taskParamMap:
             self.setSplitRule(None, taskParamMap['tgtNumEventsPerJob'],
                               JediTaskSpec.splitRuleToken['nEventsPerJob'])
     self.setSplitRule(taskParamMap, 'nFilesPerJob',
                       JediTaskSpec.splitRuleToken['nFilesPerJob'])
     self.setSplitRule(taskParamMap, 'nEventsPerJob',
                       JediTaskSpec.splitRuleToken['nEventsPerJob'])
     self.setSplitRule(taskParamMap, 'nGBPerJob',
                       JediTaskSpec.splitRuleToken['nGBPerJob'])
     self.setSplitRule(taskParamMap, 'nMaxFilesPerJob',
                       JediTaskSpec.splitRuleToken['nMaxFilesPerJob'])
     self.setSplitRule(taskParamMap, 'nEventsPerWorker',
                       JediTaskSpec.splitRuleToken['nEventsPerWorker'])
     self.setSplitRule(taskParamMap, 'useLocalIO',
                       JediTaskSpec.splitRuleToken['useLocalIO'])
     self.setSplitRule(taskParamMap, 'disableAutoRetry',
                       JediTaskSpec.splitRuleToken['disableAutoRetry'])
     self.setSplitRule(taskParamMap, 'nEsConsumers',
                       JediTaskSpec.splitRuleToken['nEsConsumers'])
     self.setSplitRule(taskParamMap, 'waitInput',
                       JediTaskSpec.splitRuleToken['waitInput'])
     self.setSplitRule(taskParamMap, 'addNthFieldToLFN',
                       JediTaskSpec.splitRuleToken['addNthFieldToLFN'])
     self.setSplitRule(taskParamMap, 'scoutSuccessRate',
                       JediTaskSpec.splitRuleToken['scoutSuccessRate'])
     self.setSplitRule(taskParamMap, 't1Weight',
                       JediTaskSpec.splitRuleToken['t1Weight'])
     self.setSplitRule(taskParamMap, 'maxAttemptES',
                       JediTaskSpec.splitRuleToken['maxAttemptES'])
     self.setSplitRule(taskParamMap, 'nSitesPerJob',
                       JediTaskSpec.splitRuleToken['nSitesPerJob'])
     self.setSplitRule(taskParamMap, 'nEventsPerMergeJob',
                       JediTaskSpec.splitRuleToken['nEventsPerMergeJob'])
     self.setSplitRule(taskParamMap, 'nFilesPerMergeJob',
                       JediTaskSpec.splitRuleToken['nFilesPerMergeJob'])
     self.setSplitRule(taskParamMap, 'nGBPerMergeJob',
                       JediTaskSpec.splitRuleToken['nGBPerMergeJob'])
     self.setSplitRule(taskParamMap, 'nMaxFilesPerMergeJob',
                       JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob'])
     if taskParamMap.has_key('loadXML'):
         self.setSplitRule(None, 3, JediTaskSpec.splitRuleToken['loadXML'])
         self.setSplitRule(None, 4,
                           JediTaskSpec.splitRuleToken['groupBoundaryID'])
     if taskParamMap.has_key('pfnList'):
         self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['pfnList'])
     if taskParamMap.has_key('noWaitParent'):
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['noWaitParent'])
     if 'respectLB' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['respectLB'])
     if taskParamMap.has_key('reuseSecOnDemand'):
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['reuseSecOnDemand'])
     if 'ddmBackEnd' in taskParamMap:
         self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd'])
     if 'disableReassign' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['disableReassign'])
     if 'allowPartialFinish' in taskParamMap:
         self.setSplitRule(
             None, 1, JediTaskSpec.splitRuleToken['allowPartialFinish'])
     if 'useExhausted' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['useExhausted'])
     if 'useRealNumEvents' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['useRealNumEvents'])
     if 'ipConnectivity' in taskParamMap:
         self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity'])
     if 'runUntilClosed' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['runUntilClosed'])
     if 'stayOutputOnSite' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['stayOutputOnSite'])
     if 'useJobCloning' in taskParamMap:
         scValue = EventServiceUtils.getJobCloningValue(
             taskParamMap['useJobCloning'])
         self.setSplitRule(None, scValue,
                           JediTaskSpec.splitRuleToken['useJobCloning'])
     if 'failWhenGoalUnreached' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['failGoalUnreached'])
     if 'switchEStoNormal' in taskParamMap:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['switchEStoNormal'])
     # return
     return

Пример #15

Показать файл

Файл: TaskRefinerBase.py Проект: PanDAWMS/panda-jedi

    def extractCommon(self,jediTaskID,taskParamMap,workQueueMapper,splitRule):
        # make task spec
        taskSpec = JediTaskSpec()
        taskSpec.jediTaskID = jediTaskID
        taskSpec.taskName = taskParamMap['taskName']
        taskSpec.userName = taskParamMap['userName']
        taskSpec.vo = taskParamMap['vo']     
        taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel']
        taskSpec.taskPriority = taskParamMap['taskPriority']
        if 'currentPriority' in taskParamMap:
            taskSpec.currentPriority = taskParamMap['currentPriority']
        else:
            taskSpec.currentPriority = taskSpec.taskPriority
        taskSpec.architecture = taskParamMap['architecture']
        taskSpec.transUses = taskParamMap['transUses']
        taskSpec.transHome = taskParamMap['transHome']
        taskSpec.transPath = taskParamMap['transPath']
        taskSpec.processingType = taskParamMap['processingType']
        taskSpec.taskType = taskParamMap['taskType']
        taskSpec.splitRule = splitRule
        taskSpec.startTime = datetime.datetime.utcnow()
        if taskParamMap.has_key('workingGroup'):
            taskSpec.workingGroup = taskParamMap['workingGroup']
        if taskParamMap.has_key('countryGroup'):
            taskSpec.countryGroup = taskParamMap['countryGroup']
        if taskParamMap.has_key('ticketID'):
            taskSpec.ticketID = taskParamMap['ticketID']
        if taskParamMap.has_key('ticketSystemType'):
            taskSpec.ticketSystemType = taskParamMap['ticketSystemType']
        if taskParamMap.has_key('reqID'):
            taskSpec.reqID = taskParamMap['reqID']
        else:
            taskSpec.reqID = jediTaskID
        if taskParamMap.has_key('coreCount'):
            taskSpec.coreCount = taskParamMap['coreCount']
        else:
            taskSpec.coreCount = 1
        if taskParamMap.has_key('walltime'):
            taskSpec.walltime = taskParamMap['walltime']
        else:
            taskSpec.walltime = 0
        if not taskParamMap.has_key('walltimeUnit'):
            # force to set NULL so that retried tasks get data from scouts again
            taskSpec.forceUpdate('walltimeUnit')
        if taskParamMap.has_key('outDiskCount'):
            taskSpec.outDiskCount = taskParamMap['outDiskCount']
        else:
            taskSpec.outDiskCount = 0
        if 'outDiskUnit' in taskParamMap:
            taskSpec.outDiskUnit = taskParamMap['outDiskUnit']
        if taskParamMap.has_key('workDiskCount'):
            taskSpec.workDiskCount = taskParamMap['workDiskCount']
        else:
            taskSpec.workDiskCount = 0
        if taskParamMap.has_key('workDiskUnit'):
            taskSpec.workDiskUnit = taskParamMap['workDiskUnit']
        if taskParamMap.has_key('ramCount'):
            taskSpec.ramCount = taskParamMap['ramCount']
        else:
            taskSpec.ramCount = 0
        if taskParamMap.has_key('ramUnit'):
            taskSpec.ramUnit = taskParamMap['ramUnit']
        if taskParamMap.has_key('baseRamCount'):
            taskSpec.baseRamCount = taskParamMap['baseRamCount']
        else:
            taskSpec.baseRamCount = 0
        # IO
        if 'ioIntensity' in taskParamMap:
            taskSpec.ioIntensity = taskParamMap['ioIntensity']
        if 'ioIntensityUnit' in taskParamMap:
            taskSpec.ioIntensityUnit = taskParamMap['ioIntensityUnit']
        # HS06 stuff
        if 'cpuTimeUnit' in taskParamMap:
            taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit']
        if 'cpuTime' in taskParamMap:
            taskSpec.cpuTime = taskParamMap['cpuTime']
        if 'cpuEfficiency' in taskParamMap:
            taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency']
        else:
            # 90% of cpu efficiency by default
            taskSpec.cpuEfficiency = 90
        if 'baseWalltime' in taskParamMap:
            taskSpec.baseWalltime = taskParamMap['baseWalltime']
        else:
            # 10min of offset by default
            taskSpec.baseWalltime = 10*60
        # for merge
        if 'mergeRamCount' in taskParamMap:
            taskSpec.mergeRamCount = taskParamMap['mergeRamCount']
        if 'mergeCoreCount' in taskParamMap:
            taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount']
        # scout
        if not taskParamMap.has_key('skipScout') and not taskSpec.isPostScout():
            taskSpec.setUseScout(True)
        # cloud
        if taskParamMap.has_key('cloud'):
            self.cloudName = taskParamMap['cloud']
            taskSpec.cloud = self.cloudName
        else:
            # set dummy to force update
            taskSpec.cloud = 'dummy'
            taskSpec.cloud = None
        # site
        if taskParamMap.has_key('site'):
            self.siteName = taskParamMap['site']
            taskSpec.site = self.siteName
        else:
            # set dummy to force update
            taskSpec.site = 'dummy'
            taskSpec.site = None
        # nucleus
        if 'nucleus' in taskParamMap:
            taskSpec.nucleus = taskParamMap['nucleus']
        # preset some parameters for job cloning
        if 'useJobCloning' in taskParamMap:
            # set implicit parameters
            if not 'nEventsPerWorker' in taskParamMap:
                taskParamMap['nEventsPerWorker'] = 1
            if not 'nSitesPerJob' in taskParamMap:
                taskParamMap['nSitesPerJob'] = 2
            if not 'nEsConsumers' in taskParamMap:
                taskParamMap['nEsConsumers'] = taskParamMap['nSitesPerJob']
        # minimum granularity
        if 'minGranularity' in taskParamMap:
            taskParamMap['nEventsPerRange'] = taskParamMap['minGranularity']
        # event service flag
        if 'useJobCloning' in taskParamMap:
            taskSpec.eventService = 2
        elif taskParamMap.has_key('nEventsPerWorker'):
            taskSpec.eventService = 1
        else:
            taskSpec.eventService = 0
        # OS
        if 'osInfo' in taskParamMap:
            taskSpec.termCondition = taskParamMap['osInfo']
        # ttcr: requested time to completion
        if taskParamMap.has_key('ttcrTimestamp'):
            try:
                # get rid of the +00:00 timezone string and parse the timestamp
                taskSpec.ttcRequested = datetime.datetime.strptime(taskParamMap['ttcrTimestamp'].split('+')[0], '%Y-%m-%d %H:%M:%S.%f')
            except (IndexError, ValueError):
                pass
        # goal
        if 'goal' in taskParamMap:
            try:
                taskSpec.goal = int(float(taskParamMap['goal'])*10)
                if taskSpec.goal > 1000:
                    taskSpec.goal = None
            except:
                pass
        # campaign
        if taskParamMap.has_key('campaign'):
            taskSpec.campaign = taskParamMap['campaign']
        # request type
        if 'requestType' in taskParamMap:
            taskSpec.requestType = taskParamMap['requestType']
        self.taskSpec = taskSpec
        # set split rule    
        if 'tgtNumEventsPerJob' in taskParamMap:
            # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used
            if not 'nFilesPerJob' in taskParamMap:
                self.setSplitRule(None,taskParamMap['tgtNumEventsPerJob'],JediTaskSpec.splitRuleToken['nEventsPerJob'])
        self.setSplitRule(taskParamMap,'nFilesPerJob',     JediTaskSpec.splitRuleToken['nFilesPerJob'])
        self.setSplitRule(taskParamMap,'nEventsPerJob',    JediTaskSpec.splitRuleToken['nEventsPerJob'])
        self.setSplitRule(taskParamMap,'nGBPerJob',        JediTaskSpec.splitRuleToken['nGBPerJob'])
        self.setSplitRule(taskParamMap,'nMaxFilesPerJob',  JediTaskSpec.splitRuleToken['nMaxFilesPerJob'])
        self.setSplitRule(taskParamMap,'nEventsPerWorker', JediTaskSpec.splitRuleToken['nEventsPerWorker'])
        self.setSplitRule(taskParamMap,'useLocalIO',       JediTaskSpec.splitRuleToken['useLocalIO'])
        self.setSplitRule(taskParamMap,'disableAutoRetry', JediTaskSpec.splitRuleToken['disableAutoRetry'])
        self.setSplitRule(taskParamMap,'nEsConsumers',     JediTaskSpec.splitRuleToken['nEsConsumers'])
        self.setSplitRule(taskParamMap,'waitInput',        JediTaskSpec.splitRuleToken['waitInput'])
        self.setSplitRule(taskParamMap,'addNthFieldToLFN', JediTaskSpec.splitRuleToken['addNthFieldToLFN'])
        self.setSplitRule(taskParamMap,'scoutSuccessRate', JediTaskSpec.splitRuleToken['scoutSuccessRate'])
        self.setSplitRule(taskParamMap,'t1Weight',         JediTaskSpec.splitRuleToken['t1Weight'])
        self.setSplitRule(taskParamMap,'maxAttemptES',     JediTaskSpec.splitRuleToken['maxAttemptES'])
        self.setSplitRule(taskParamMap,'maxAttemptEsJob',  JediTaskSpec.splitRuleToken['maxAttemptEsJob'])
        self.setSplitRule(taskParamMap,'nSitesPerJob',     JediTaskSpec.splitRuleToken['nSitesPerJob'])
        self.setSplitRule(taskParamMap,'nEventsPerMergeJob',   JediTaskSpec.splitRuleToken['nEventsPerMergeJob'])
        self.setSplitRule(taskParamMap,'nFilesPerMergeJob',    JediTaskSpec.splitRuleToken['nFilesPerMergeJob'])
        self.setSplitRule(taskParamMap,'nGBPerMergeJob',       JediTaskSpec.splitRuleToken['nGBPerMergeJob'])
        self.setSplitRule(taskParamMap,'nMaxFilesPerMergeJob', JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob'])
        self.setSplitRule(taskParamMap,'maxWalltime', JediTaskSpec.splitRuleToken['maxWalltime'])
        self.setSplitRule(taskParamMap,'tgtMaxOutputForNG', JediTaskSpec.splitRuleToken['tgtMaxOutputForNG'])
        if 'nJumboJobs' in taskParamMap:
            self.setSplitRule(taskParamMap,'nJumboJobs',JediTaskSpec.splitRuleToken['nJumboJobs'])
            taskSpec.useJumbo = JediTaskSpec.enum_useJumbo['waiting']
            if 'maxJumboPerSite' in taskParamMap:
                self.setSplitRule(taskParamMap,'maxJumboPerSite',JediTaskSpec.splitRuleToken['maxJumboPerSite'])
        if 'minCpuEfficiency' in taskParamMap: 
            self.setSplitRule(taskParamMap,'minCpuEfficiency',JediTaskSpec.splitRuleToken['minCpuEfficiency'])
        if taskParamMap.has_key('loadXML'):
            self.setSplitRule(None,3,JediTaskSpec.splitRuleToken['loadXML'])
            self.setSplitRule(None,4,JediTaskSpec.splitRuleToken['groupBoundaryID'])
        if taskParamMap.has_key('pfnList'):
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['pfnList'])
        if taskParamMap.has_key('noWaitParent') and taskParamMap['noWaitParent'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['noWaitParent'])
        if 'respectLB' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['respectLB'])
        if 'orderByLB' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['orderByLB'])
        if 'respectSplitRule' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['respectSplitRule'])
        if taskParamMap.has_key('reuseSecOnDemand'):
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['reuseSecOnDemand'])
        if 'ddmBackEnd' in taskParamMap:
            self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd'])
        if 'disableReassign' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['disableReassign'])
        if 'allowPartialFinish' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowPartialFinish'])
        if 'useExhausted' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useExhausted'])
        if 'useRealNumEvents' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useRealNumEvents'])
        if 'ipConnectivity' in taskParamMap:
            self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity'])
        if 'altStageOut' in taskParamMap:
            self.taskSpec.setAltStageOut(taskParamMap['altStageOut'])
        if 'allowInputLAN' in taskParamMap:
            self.taskSpec.setAllowInputLAN(taskParamMap['allowInputLAN'])
        if 'runUntilClosed' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['runUntilClosed'])
        if 'stayOutputOnSite' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['stayOutputOnSite'])
        if 'useJobCloning' in taskParamMap:
            scValue = EventServiceUtils.getJobCloningValue(taskParamMap['useJobCloning'])
            self.setSplitRule(None,scValue,JediTaskSpec.splitRuleToken['useJobCloning'])
        if 'failWhenGoalUnreached' in taskParamMap and taskParamMap['failWhenGoalUnreached'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['failGoalUnreached'])
        if 'switchEStoNormal' in taskParamMap:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['switchEStoNormal'])
        if 'nEventsPerRange' in taskParamMap:
            self.setSplitRule(taskParamMap,'nEventsPerRange',JediTaskSpec.splitRuleToken['dynamicNumEvents'])
        if 'allowInputWAN' in taskParamMap and taskParamMap['allowInputWAN'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowInputWAN'])
        if 'putLogToOS' in taskParamMap and taskParamMap['putLogToOS'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['putLogToOS'])
        if 'mergeEsOnOS' in taskParamMap and taskParamMap['mergeEsOnOS'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['mergeEsOnOS'])
        if 'writeInputToFile' in taskParamMap and taskParamMap['writeInputToFile'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['writeInputToFile'])
        if 'useFileAsSourceLFN' in taskParamMap and taskParamMap['useFileAsSourceLFN'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useFileAsSourceLFN'])
        if 'ignoreMissingInDS' in taskParamMap and taskParamMap['ignoreMissingInDS'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['ignoreMissingInDS'])
        if 'noExecStrCnv' in taskParamMap and taskParamMap['noExecStrCnv'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['noExecStrCnv'])
        if 'inFilePosEvtNum' in taskParamMap and taskParamMap['inFilePosEvtNum'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['inFilePosEvtNum'])
        if self.taskSpec.useEventService() and not taskSpec.useJobCloning():
            if 'registerEsFiles' in taskParamMap and taskParamMap['registerEsFiles'] == True:
                self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['registerEsFiles'])
        if 'disableAutoFinish' in taskParamMap and taskParamMap['disableAutoFinish'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['disableAutoFinish'])
        if 'resurrectConsumers' in taskParamMap and taskParamMap['resurrectConsumers'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['resurrectConsumers'])
        if 'usePrefetcher' in taskParamMap and taskParamMap['usePrefetcher'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['usePrefetcher'])
        if 'notDiscardEvents' in taskParamMap and taskParamMap['notDiscardEvents'] == True:
            self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['notDiscardEvents'])
        if 'decAttOnFailedES' in taskParamMap and taskParamMap['decAttOnFailedES'] is True:
            self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['decAttOnFailedES'])
        if 'useZipToPin' in taskParamMap and taskParamMap['useZipToPin'] is True:
            self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['useZipToPin'])
        if 'osMatching' in taskParamMap and taskParamMap['osMatching'] is True:
            self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['osMatching'])
        # work queue
        workQueue = None
        if 'workQueueName' in taskParamMap:
            # work queue is specified
            workQueue = workQueueMapper.getQueueByName(taskSpec.vo, taskSpec.prodSourceLabel, taskParamMap['workQueueName'])
        if workQueue is None:
            # get work queue based on task attributes
            workQueue,tmpStr = workQueueMapper.getQueueWithSelParams(taskSpec.vo,
                                                                     taskSpec.prodSourceLabel,
                                                                     prodSourceLabel=taskSpec.prodSourceLabel,
                                                                     processingType=taskSpec.processingType,
                                                                     workingGroup=taskSpec.workingGroup,
                                                                     coreCount=taskSpec.coreCount,
                                                                     site=taskSpec.site,
                                                                     eventService=taskSpec.eventService,
                                                                     splitRule=taskSpec.splitRule,
                                                                     campaign=taskSpec.campaign)
        if workQueue is None:
            errStr  = 'workqueue is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel)
            errStr += 'processingType={0} workingGroup={1} coreCount={2} eventService={3} '.format(taskSpec.processingType,
                                                                                                   taskSpec.workingGroup,
                                                                                                   taskSpec.coreCount,
                                                                                                   taskSpec.eventService)
            errStr += 'splitRule={0} campaign={1}'.format(taskSpec.splitRule,taskSpec.campaign)
            raise RuntimeError,errStr
        self.taskSpec.workQueue_ID = workQueue.queue_id

        # Initialize the global share
        gshare = None
        if 'gshare' in taskParamMap and self.taskBufferIF.is_valid_share(taskParamMap['gshare']):
            # work queue is specified
            gshare = taskParamMap['gshare']
        else:
            # get share based on definition
            gshare = self.taskBufferIF.get_share_for_task(self.taskSpec)
            if gshare is None:
                gshare = 'Undefined' # Should not happen. Undefined is set when no share is found
                # errStr  = 'share is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel)
                # errStr += 'workingGroup={0} campaign={1} '.format(taskSpec.workingGroup, taskSpec.campaign)
                # raise RuntimeError,errStr

            self.taskSpec.gshare = gshare

        # Initialize the resource type
        try:
            self.taskSpec.resource_type = self.taskBufferIF.get_resource_type_task(self.taskSpec)
        except:
            self.taskSpec.resource_type = 'Undefined'

        # return
        return

Пример #16

Показать файл

Файл: AtlasProdPostProcessor.py Проект: PanDAWMS/panda-jedi

 def doPostProcess(self,taskSpec,tmpLog):
     # pre-check
     try:
         tmpStat = self.doPreCheck(taskSpec,tmpLog)
         if tmpStat:
             return self.SC_SUCCEEDED
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doPreCheck failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     # get DDM I/F
     ddmIF = self.ddmIF.getInterface(taskSpec.vo)
     # loop over all datasets
     for datasetSpec in taskSpec.datasetSpecList:
         # skip pseudo output datasets
         if datasetSpec.type in ['output'] and datasetSpec.isPseudo():
             continue
         try:
             # remove wrong files
             if datasetSpec.type in ['output']:
                 # get successful files
                 okFiles = self.taskBufferIF.getSuccessfulFiles_JEDI(datasetSpec.jediTaskID,datasetSpec.datasetID)
                 if okFiles == None:
                     tmpLog.warning('failed to get successful files for {0}'.format(datasetSpec.datasetName))
                     return self.SC_FAILED
                 # get files in dataset
                 ddmFiles = ddmIF.getFilesInDataset(datasetSpec.datasetName,skipDuplicate=False,ignoreUnknown=True)
                 tmpLog.debug('datasetID={0}:Name={1} has {2} files in DB, {3} files in DDM'.format(datasetSpec.datasetID,
                                                                                                   datasetSpec.datasetName,
                                                                                                   len(okFiles),len(ddmFiles)))
                 # check all files
                 toDelete = []
                 for tmpGUID,attMap in ddmFiles.iteritems():
                     if attMap['lfn'] not in okFiles:
                         did = {'scope':attMap['scope'], 'name':attMap['lfn']}
                         toDelete.append(did)
                         tmpLog.debug('delete {0} from {1}'.format(attMap['lfn'],datasetSpec.datasetName))
                 # delete
                 if toDelete != []:
                     ddmIF.deleteFilesFromDataset(datasetSpec.datasetName,toDelete)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to remove wrong files with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # freeze output and log datasets
             if datasetSpec.type in ['output','log','trn_log']:
                 tmpLog.info('freezing datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 ddmIF.freezeDataset(datasetSpec.datasetName,ignoreUnknown=True)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to freeze datasets with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # delete transient datasets
             if datasetSpec.type in ['trn_output']:
                 tmpLog.debug('deleting datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 retStr = ddmIF.deleteDataset(datasetSpec.datasetName,False,ignoreUnknown=True)
                 tmpLog.info(retStr)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete datasets with {0}:{1}'.format(errtype.__name__,errvalue))
     # check duplication
     if self.getFinalTaskStatus(taskSpec) in ['finished','done']:
         nDup = self.taskBufferIF.checkDuplication_JEDI(taskSpec.jediTaskID)
         tmpLog.debug('checked duplication with {0}'.format(nDup))
         if nDup > 0:
             errStr = 'paused since {0} duplication found'.format(nDup)
             taskSpec.oldStatus = self.getFinalTaskStatus(taskSpec)
             taskSpec.status = 'paused'
             taskSpec.setErrDiag(errStr)
             tmpLog.debug(errStr)
     # delete ES datasets
     if taskSpec.registerEsFiles():
         try:
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             tmpLog.debug('deleting ES dataset name={0}'.format(targetName))
             retStr = ddmIF.deleteDataset(targetName,False,ignoreUnknown=True)
             tmpLog.debug(retStr)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete ES dataset with {0}:{1}'.format(errtype.__name__,errvalue))
     try:
         self.doBasicPostProcess(taskSpec,tmpLog)
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doBasicPostProcess failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     return self.SC_SUCCEEDED