def getStatus(self, strIDs, timeout):
     # convert str to list
     ids = strIDs.split()
     # peek jobs
     tmpWrapper = _TimedMethod(self.taskBuffer.peekJobs, timeout)
     tmpWrapper.run(ids, False, True, True, False)
     # make response
     if tmpWrapper.result == Protocol.TimeOutToken:
         # timeout
         response = Protocol.Response(Protocol.SC_TimeOut)
     else:
         if isinstance(tmpWrapper.result, types.ListType):
             # succeed
             response = Protocol.Response(Protocol.SC_Success)
             # make return
             retStr = ''
             attStr = ''
             for job in tmpWrapper.result:
                 if job == None:
                     retStr += '%s+' % 'notfound'
                     attStr += '0+'
                 else:
                     retStr += '%s+' % job.jobStatus
                     attStr += '%s+' % job.attemptNr
             response.appendNode('status', retStr[:-1])
             response.appendNode('attemptNr', attStr[:-1])
         else:
             # failed
             response = Protocol.Response(Protocol.SC_Failed)
     _logger.debug("getStatus : %s ret -> %s" % (strIDs, response.encode()))
     return response.encode()
 def getKeyPair(self, realDN, publicKeyName, privateKeyName):
     tmpMsg = "getKeyPair {0}/{1} : ".format(publicKeyName, privateKeyName)
     if realDN == None:
         # cannot extract DN
         tmpMsg += "failed since DN cannot be extracted"
         _logger.debug(tmpMsg)
         response = Protocol.Response(
             Protocol.SC_Perms, 'Cannot extract DN from proxy. not HTTPS?')
     else:
         # get compact DN
         compactDN = self.taskBuffer.cleanUserID(realDN)
         # check permission
         self.specialDispatchParams.update()
         if not 'allowKey' in self.specialDispatchParams:
             allowKey = []
         else:
             allowKey = self.specialDispatchParams['allowKey']
         if not compactDN in allowKey:
             # permission denied
             tmpMsg += "failed since '{0}' not in the authorized user list who have 'k' in {1}.USERS.GRIDPREF".format(
                 compactDN, panda_config.schemaMETA)
             _logger.debug(tmpMsg)
             response = Protocol.Response(Protocol.SC_Perms, tmpMsg)
         else:
             # look for key pair
             if not 'keyPair' in self.specialDispatchParams:
                 keyPair = {}
             else:
                 keyPair = self.specialDispatchParams['keyPair']
             notFound = False
             if not publicKeyName in keyPair:
                 # public key is missing
                 notFound = True
                 tmpMsg += "failed for '{2}' since {0} is missing on {1}".format(
                     publicKeyName, socket.getfqdn(), compactDN)
             elif not privateKeyName in keyPair:
                 # private key is missing
                 notFound = True
                 tmpMsg += "failed for '{2}' since {0} is missing on {1}".format(
                     privateKeyName, socket.getfqdn(), compactDN)
             if notFound:
                 # private or public key is missing
                 _logger.debug(tmpMsg)
                 response = Protocol.Response(Protocol.SC_MissKey, tmpMsg)
             else:
                 # key pair is available
                 response = Protocol.Response(Protocol.SC_Success)
                 response.appendNode('publicKey', keyPair[publicKeyName])
                 response.appendNode('privateKey', keyPair[privateKeyName])
                 tmpMsg += "sent key-pair to '{0}'".format(compactDN)
                 _logger.debug(tmpMsg)
     # return
     return response.encode()
 def updateEventRanges(self, eventRanges, timeout, acceptJson, version):
     # peek jobs
     tmpWrapper = _TimedMethod(self.taskBuffer.updateEventRanges, timeout)
     tmpWrapper.run(eventRanges, version)
     # make response
     if tmpWrapper.result == Protocol.TimeOutToken:
         # timeout
         response = Protocol.Response(Protocol.SC_TimeOut)
     else:
         # succeed
         response = Protocol.Response(Protocol.SC_Success)
         # make return
         response.appendNode('Returns', tmpWrapper.result[0])
         response.appendNode('Command', tmpWrapper.result[1])
     _logger.debug("updateEventRanges : ret -> %s" %
                   (response.encode(acceptJson)))
     return response.encode(acceptJson)
示例#4
0
    def ackCommands(self, command_ids, timeout, accept_json):
        """
        Acknowledge the commands from a list of IDs
        """
        _logger.debug("command_ids : {0}".format(command_ids))
        tmp_wrapper = _TimedMethod(self.taskBuffer.ackCommands, timeout)
        tmp_wrapper.run(command_ids)

        # Make response
        if tmp_wrapper.result == Protocol.TimeOutToken:
            # timeout
            response = Protocol.Response(Protocol.SC_TimeOut)
        else:
            # success
            response = Protocol.Response(Protocol.SC_Success)
            response.appendNode('Returns', tmp_wrapper.result)

        _logger.debug("ackCommands : ret -> %s" %
                      (response.encode(accept_json)))
        return response.encode(accept_json)
示例#5
0
    def getCommands(self, harvester_id, n_commands, timeout, accept_json):
        """
        Get commands for a particular harvester instance
        """
        tmp_wrapper = _TimedMethod(self.taskBuffer.getCommands, timeout)
        tmp_wrapper.run(harvester_id, n_commands)

        # Make response
        if tmp_wrapper.result == Protocol.TimeOutToken:
            # timeout
            response = Protocol.Response(Protocol.SC_TimeOut)
        else:
            # success
            response = Protocol.Response(Protocol.SC_Success)
            response.appendNode('Returns', tmp_wrapper.result[0])
            response.appendNode('Commands', tmp_wrapper.result[1])

        _logger.debug("getCommands : ret -> %s" %
                      (response.encode(accept_json)))
        return response.encode(accept_json)
 def getEventRanges(self, pandaID, jobsetID, jediTaskID, nRanges, timeout,
                    acceptJson):
     # peek jobs
     tmpWrapper = _TimedMethod(self.taskBuffer.getEventRanges, timeout)
     tmpWrapper.run(pandaID, jobsetID, jediTaskID, nRanges, acceptJson)
     # make response
     if tmpWrapper.result == Protocol.TimeOutToken:
         # timeout
         response = Protocol.Response(Protocol.SC_TimeOut)
     else:
         if tmpWrapper.result != None:
             # succeed
             response = Protocol.Response(Protocol.SC_Success)
             # make return
             response.appendNode('eventRanges', tmpWrapper.result)
         else:
             # failed
             response = Protocol.Response(Protocol.SC_Failed)
     _logger.debug("getEventRanges : %s ret -> %s" %
                   (pandaID, response.encode(acceptJson)))
     return response.encode(acceptJson)
 def updateEventRange(self, eventRangeID, eventStatus, coreCount,
                      cpuConsumptionTime, objstoreID, timeout):
     # peek jobs
     tmpWrapper = _TimedMethod(self.taskBuffer.updateEventRange, timeout)
     tmpWrapper.run(eventRangeID, eventStatus, coreCount,
                    cpuConsumptionTime, objstoreID)
     # make response
     _logger.debug(str(tmpWrapper.result))
     if tmpWrapper.result == Protocol.TimeOutToken:
         # timeout
         response = Protocol.Response(Protocol.SC_TimeOut)
     else:
         if tmpWrapper.result[0] == True:
             # succeed
             response = Protocol.Response(Protocol.SC_Success)
             response.appendNode('Command', tmpWrapper.result[1])
         else:
             # failed
             response = Protocol.Response(Protocol.SC_Failed)
     _logger.debug("updateEventRange : %s ret -> %s" %
                   (eventRangeID, response.encode()))
     return response.encode()
示例#8
0
 def getProxy(self, realDN, role):
     tmpMsg = "getProxy DN={0} role={1} : ".format(realDN, role)
     if realDN == None:
         # cannot extract DN
         tmpMsg += "failed since DN cannot be extracted"
         _logger.debug(tmpMsg)
         response = Protocol.Response(
             Protocol.SC_Perms, 'Cannot extract DN from proxy. not HTTPS?')
     else:
         # get compact DN
         compactDN = self.taskBuffer.cleanUserID(realDN)
         # check permission
         self.specialDispatchParams.update()
         if not 'allowProxy' in self.specialDispatchParams:
             allowProxy = []
         else:
             allowProxy = self.specialDispatchParams['allowProxy']
         if not compactDN in allowProxy:
             # permission denied
             tmpMsg += "failed since '{0}' not in the authorized user list who have 'p' in {1}.USERS.GRIDPREF ".format(
                 compactDN, panda_config.schemaMETA)
             tmpMsg += "to get proxy"
             _logger.debug(tmpMsg)
             response = Protocol.Response(Protocol.SC_Perms, tmpMsg)
         else:
             # get proxy
             response = Protocol.Response(Protocol.SC_Success, '')
             tmpStat, tmpMsg = self.setUserProxy(response, realDN, role)
             if not tmpStat:
                 _logger.debug(tmpMsg)
                 response.appendNode('StatusCode', Protocol.SC_ProxyError)
             else:
                 tmpMsg = "sent proxy"
                 _logger.debug(tmpMsg)
     # return
     return response.encode(True)
def updateJob(req,
              jobId,
              state,
              token=None,
              transExitCode=None,
              pilotErrorCode=None,
              pilotErrorDiag=None,
              timestamp=None,
              timeout=60,
              xml='',
              node=None,
              workdir=None,
              cpuConsumptionTime=None,
              cpuConsumptionUnit=None,
              remainingSpace=None,
              schedulerID=None,
              pilotID=None,
              siteName=None,
              messageLevel=None,
              pilotLog='',
              metaData='',
              cpuConversionFactor=None,
              exeErrorCode=None,
              exeErrorDiag=None,
              pilotTiming=None,
              computingElement=None,
              startTime=None,
              endTime=None,
              nEvents=None,
              nInputFiles=None,
              batchID=None,
              attemptNr=None,
              jobMetrics=None,
              stdout='',
              jobSubStatus=None,
              coreCount=None,
              maxRSS=None,
              maxVMEM=None,
              maxSWAP=None,
              maxPSS=None,
              avgRSS=None,
              avgVMEM=None,
              avgSWAP=None,
              avgPSS=None):
    tmpLog = LogWrapper(
        _logger, 'updateJob PandaID={0} PID={1}'.format(jobId, os.getpid()))
    tmpLog.debug('start')
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,
                             realDN,
                             jobDispatcher,
                             site=siteName,
                             hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token, jobDispatcher)
    # accept json
    acceptJson = req.acceptJson()
    _logger.debug(
        "updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s,maxRSS=%s,maxVMEM=%s,maxSWAP=%s,maxPSS=%s,avgRSS=%s,avgVMEM=%s,avgSWAP=%s,avgPSS=%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)"
        % (jobId, state, transExitCode, pilotErrorCode, pilotErrorDiag, node,
           workdir, cpuConsumptionTime, cpuConsumptionUnit, remainingSpace,
           schedulerID, pilotID, siteName, messageLevel, nEvents, nInputFiles,
           cpuConversionFactor, exeErrorCode, exeErrorDiag, pilotTiming,
           computingElement, startTime, endTime, batchID, attemptNr,
           jobSubStatus, coreCount, realDN, prodManager, token, validToken,
           str(fqans), maxRSS, maxVMEM, maxSWAP, maxPSS, avgRSS, avgVMEM,
           avgSWAP, avgPSS, xml, pilotLog, metaData, jobMetrics, stdout))
    _pilotReqLogger.info('method=updateJob,site=%s,node=%s,type=None' %
                         (siteName, node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        return Protocol.Response(Protocol.SC_Role).encode(acceptJson)
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode(acceptJson)
    # aborting message
    if jobId == 'NULL':
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # check status
    if not state in [
            'running', 'failed', 'finished', 'holding', 'starting',
            'transferring'
    ]:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # pilot log
    tmpLog.debug('sending log')
    if pilotLog != '':
        try:
            # make message
            message = pilotLog
            # get logger
            _pandaLogger = PandaLogger()
            _pandaLogger.lock()
            _pandaLogger.setParams({'Type': 'pilotLog', 'PandaID': int(jobId)})
            logger = _pandaLogger.getHttpLogger(panda_config.loggername)
            # add message
            logger.info(message)
        except:
            tmpLog.debug('failed to send log')
        finally:
            tmpLog.debug('release lock')
            try:
                # release HTTP handler
                _pandaLogger.release()
            except:
                pass
    tmpLog.debug('done log')
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime'] = cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit'] = cpuConsumptionUnit
    if node != None:
        param['modificationHost'] = node[:128]
    if transExitCode != None:
        param['transExitCode'] = transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode'] = pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag'] = pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics'] = jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID'] = schedulerID
    if pilotID != None:
        param['pilotID'] = pilotID[:200]
    if batchID != None:
        param['batchID'] = batchID[:80]
    if exeErrorCode != None:
        param['exeErrorCode'] = exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag'] = exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion'] = cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming'] = pilotTiming
    if computingElement != None:
        param['computingElement'] = computingElement
    if nEvents != None:
        param['nEvents'] = nEvents
    if nInputFiles != None:
        param['nInputFiles'] = nInputFiles
    if not jobSubStatus in [None, '']:
        param['jobSubStatus'] = jobSubStatus
    if not coreCount in [None, '']:
        param['actualCoreCount'] = coreCount
    if maxRSS != None:
        param['maxRSS'] = maxRSS
    if maxVMEM != None:
        param['maxVMEM'] = maxVMEM
    if maxSWAP != None:
        param['maxSWAP'] = maxSWAP
    if maxPSS != None:
        param['maxPSS'] = maxPSS
    if avgRSS != None:
        param['avgRSS'] = avgRSS
    if avgVMEM != None:
        param['avgVMEM'] = avgVMEM
    if avgSWAP != None:
        param['avgSWAP'] = avgSWAP
    if avgPSS != None:
        param['avgPSS'] = avgPSS
    if startTime != None:
        try:
            param['startTime'] = datetime.datetime(
                *time.strptime(startTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime'] = datetime.datetime(
                *time.strptime(endTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    tmpLog.debug('executing')
    return jobDispatcher.updateJob(int(jobId), state, int(timeout), xml,
                                   siteName, param, metaData, attemptNr,
                                   stdout, acceptJson)
示例#10
0
def getJob(req,
           siteName,
           token=None,
           timeout=60,
           cpu=None,
           mem=None,
           diskSpace=None,
           prodSourceLabel=None,
           node=None,
           computingElement=None,
           AtlasRelease=None,
           prodUserID=None,
           getProxyKey=None,
           countryGroup=None,
           workingGroup=None,
           allowOtherCountry=None,
           taskID=None,
           nJobs=None):
    _logger.debug("getJob(%s)" % siteName)
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    if getProxyKey == 'True':
        # don't use /atlas to prevent normal proxy getting credname
        prodManager = _checkRole(fqans,
                                 realDN,
                                 jobDispatcher,
                                 False,
                                 site=siteName)
    else:
        prodManager = _checkRole(fqans,
                                 realDN,
                                 jobDispatcher,
                                 site=siteName,
                                 hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token, jobDispatcher)
    # set DN for non-production user
    if not prodManager:
        prodUserID = realDN
    # allow getProxyKey for production role
    if getProxyKey == 'True' and prodManager:
        getProxyKey = True
    else:
        getProxyKey = False
    # convert mem and diskSpace
    try:
        mem = int(float(mem))
        if mem < 0:
            mem = 0
    except:
        mem = 0
    try:
        diskSpace = int(float(diskSpace))
        if diskSpace < 0:
            diskSpace = 0
    except:
        diskSpace = 0
    _logger.debug("getJob(%s,nJobs=%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,taskID=%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s,json:%s)" \
                  % (siteName,nJobs,cpu,mem,diskSpace,prodSourceLabel,node,
                     computingElement,AtlasRelease,prodUserID,getProxyKey,countryGroup,workingGroup,
                     allowOtherCountry,taskID,realDN,prodManager,token,validToken,str(fqans),req.acceptJson()))
    _pilotReqLogger.info('method=getJob,site=%s,node=%s,type=%s' %
                         (siteName, node, prodSourceLabel))
    # invalid role
    if (not prodManager) and (not prodSourceLabel in ['user']):
        _logger.warning("getJob(%s) : invalid role" % siteName)
        return Protocol.Response(Protocol.SC_Role).encode(req.acceptJson())
    # invalid token
    if not validToken:
        _logger.warning("getJob(%s) : invalid token" % siteName)
        return Protocol.Response(Protocol.SC_Invalid).encode(req.acceptJson())
    # invoke JD
    return jobDispatcher.getJob(siteName,
                                prodSourceLabel, cpu, mem, diskSpace, node,
                                int(timeout), computingElement, AtlasRelease,
                                prodUserID, getProxyKey, countryGroup,
                                workingGroup, allowOtherCountry, realDN,
                                taskID, nJobs, req.acceptJson())
示例#11
0
 def updateJob(self,
               jobID,
               jobStatus,
               timeout,
               xml,
               siteName,
               param,
               metadata,
               attemptNr=None,
               stdout='',
               acceptJson=False):
     # recoverable error for ES merge
     recoverableEsMerge = False
     if 'pilotErrorCode' in param and param['pilotErrorCode'] in [
             '1099', '1137', '1151', '1152', '1221', '1224', '1225'
     ]:
         recoverableEsMerge = True
     # retry failed analysis job and ddm job
     if jobStatus=='failed' \
             and ((param.has_key('pilotErrorCode') and (param['pilotErrorCode'] in ['1200','1201'] \
                                                            or param['pilotErrorCode'].startswith('-') \
                                                            or recoverableEsMerge)) \
                      or (siteName != None and siteName.find('DDM') != -1)):
         # retry
         if param.has_key('pilotErrorCode') and (param['pilotErrorCode'].startswith('-') or \
                                                     recoverableEsMerge):
             # pilot retry with new PandaID. Negative codes or ESMERGERECOVERABLE
             ret = self.taskBuffer.retryJob(
                 jobID,
                 param,
                 getNewPandaID=True,
                 attemptNr=attemptNr,
                 recoverableEsMerge=recoverableEsMerge)
         else:
             # old style
             ret = self.taskBuffer.retryJob(jobID,
                                            param,
                                            attemptNr=attemptNr)
         if ret:
             # return succeed
             response = Protocol.Response(Protocol.SC_Success)
             return response.encode(acceptJson)
     # add metadata
     if metadata != '':
         ret = self.taskBuffer.addMetadata([jobID], [metadata], [jobStatus])
         if len(ret) > 0 and not ret[0]:
             _logger.debug("updateJob : %s failed to add metadata" % jobID)
             # return succeed
             response = Protocol.Response(Protocol.SC_Success)
             return response.encode(acceptJson)
     # add stdout
     if stdout != '':
         self.taskBuffer.addStdOut(jobID, stdout)
     # update
     tmpStatus = jobStatus
     updateStateChange = False
     if jobStatus == 'failed' or jobStatus == 'finished':
         tmpStatus = 'holding'
         # update stateChangeTime to prevent Watcher from finding this job
         updateStateChange = True
         param['jobDispatcherErrorDiag'] = None
     elif jobStatus in ['holding', 'transferring']:
         param[
             'jobDispatcherErrorDiag'] = 'set to {0} by the pilot at {1}'.format(
                 jobStatus,
                 datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
     if tmpStatus == 'holding':
         tmpWrapper = _TimedMethod(self.taskBuffer.updateJobStatus, None)
     else:
         tmpWrapper = _TimedMethod(self.taskBuffer.updateJobStatus, timeout)
     tmpWrapper.run(jobID, tmpStatus, param, updateStateChange, attemptNr)
     # make response
     if tmpWrapper.result == Protocol.TimeOutToken:
         # timeout
         response = Protocol.Response(Protocol.SC_TimeOut)
     else:
         if tmpWrapper.result:
             # succeed
             response = Protocol.Response(Protocol.SC_Success)
             # set command
             if isinstance(tmpWrapper.result, types.StringType):
                 response.appendNode('command', tmpWrapper.result)
             else:
                 response.appendNode('command', 'NULL')
             # add output to dataset
             if not tmpWrapper.result in ["badattemptnr", "alreadydone"
                                          ] and (jobStatus == 'failed' or
                                                 jobStatus == 'finished'):
                 Adder(self.taskBuffer,
                       jobID,
                       xml,
                       jobStatus,
                       attemptNr=attemptNr).start()
         else:
             # failed
             response = Protocol.Response(Protocol.SC_Failed)
     _logger.debug("updateJob : %s ret -> %s" %
                   (jobID, response.encode(acceptJson)))
     return response.encode(acceptJson)
示例#12
0
 def getJob(self, siteName, prodSourceLabel, cpu, mem, diskSpace, node,
            timeout, computingElement, atlasRelease, prodUserID,
            getProxyKey, countryGroup, workingGroup, allowOtherCountry,
            realDN, taskID, nJobs, acceptJson):
     jobs = []
     useGLEXEC = False
     useProxyCache = False
     try:
         tmpNumJobs = int(nJobs)
     except:
         tmpNumJobs = None
     if tmpNumJobs == None:
         tmpNumJobs = 1
     # wrapper function for timeout
     if hasattr(panda_config,
                'global_shares') and panda_config.global_shares == True:
         tmpWrapper = _TimedMethod(self.taskBuffer.getJobsGShare, timeout)
     else:
         tmpWrapper = _TimedMethod(self.taskBuffer.getJobs, timeout)
     tmpWrapper.run(tmpNumJobs, siteName, prodSourceLabel, cpu, mem,
                    diskSpace, node, timeout, computingElement,
                    atlasRelease, prodUserID, getProxyKey, countryGroup,
                    workingGroup, allowOtherCountry, taskID)
     if isinstance(tmpWrapper.result, types.ListType):
         jobs = jobs + tmpWrapper.result
     # make response
     if len(jobs) > 0:
         proxyKey = jobs[-1]
         nSent = jobs[-2]
         jobs = jobs[:-2]
     if len(jobs) != 0:
         # succeed
         self.siteMapperCache.update()
         responseList = []
         # append Jobs
         for tmpJob in jobs:
             response = Protocol.Response(Protocol.SC_Success)
             response.appendJob(tmpJob, self.siteMapperCache)
             # append nSent
             response.appendNode('nSent', nSent)
             # set proxy key
             if getProxyKey:
                 response.setProxyKey(proxyKey)
             # check if glexec or proxy cache is used
             if hasattr(panda_config, 'useProxyCache'
                        ) and panda_config.useProxyCache == True:
                 self.specialDispatchParams.update()
                 if not 'glexecSites' in self.specialDispatchParams:
                     glexecSites = {}
                 else:
                     glexecSites = self.specialDispatchParams['glexecSites']
                 if siteName in glexecSites:
                     if glexecSites[siteName] == 'True':
                         useGLEXEC = True
                     elif glexecSites[siteName] == 'test' and \
                             (prodSourceLabel in ['test','prod_test'] or \
                                  (tmpJob.processingType in ['gangarobot'])):
                         useGLEXEC = True
                 if not 'proxyCacheSites' in self.specialDispatchParams:
                     proxyCacheSites = {}
                 else:
                     proxyCacheSites = self.specialDispatchParams[
                         'proxyCacheSites']
                 if siteName in proxyCacheSites:
                     useProxyCache = True
             # set proxy
             if useGLEXEC or useProxyCache:
                 try:
                     #  get compact
                     compactDN = self.taskBuffer.cleanUserID(realDN)
                     # check permission
                     self.specialDispatchParams.update()
                     if not 'allowProxy' in self.specialDispatchParams:
                         allowProxy = []
                     else:
                         allowProxy = self.specialDispatchParams[
                             'allowProxy']
                     if not compactDN in allowProxy:
                         _logger.warning(
                             "getJob : %s %s '%s' no permission to retrive user proxy"
                             % (siteName, node, compactDN))
                     else:
                         if useProxyCache:
                             tmpStat, tmpOut = response.setUserProxy(
                                 proxyCacheSites[siteName]['dn'],
                                 proxyCacheSites[siteName]['role'])
                         else:
                             tmpStat, tmpOut = response.setUserProxy()
                         if not tmpStat:
                             _logger.warning(
                                 "getJob : %s %s failed to get user proxy : %s"
                                 % (siteName, node, tmpOut))
                 except:
                     errtype, errvalue = sys.exc_info()[:2]
                     _logger.warning(
                         "getJob : %s %s failed to get user proxy with %s:%s"
                         % (siteName, node, errtype.__name__, errvalue))
             # panda proxy
             if 'pandaProxySites' in self.specialDispatchParams and siteName in self.specialDispatchParams['pandaProxySites'] \
                     and (EventServiceUtils.isEventServiceJob(tmpJob) or EventServiceUtils.isEventServiceMerge(tmpJob)):
                 # get secret key
                 tmpSecretKey, tmpErrMsg = DispatcherUtils.getSecretKey(
                     tmpJob.PandaID)
                 if tmpSecretKey == None:
                     _logger.warning(
                         "getJob : PandaID=%s site=%s failed to get panda proxy secret key : %s"
                         % (tmpJob.PandaID, siteName, tmpErrMsg))
                 else:
                     # set secret key
                     _logger.debug("getJob : PandaID=%s key=%s" %
                                   (tmpJob.PandaID, tmpSecretKey))
                     response.setPandaProxySecretKey(tmpSecretKey)
             # add
             responseList.append(response.data)
         # make response for bulk
         if nJobs != None:
             response = Protocol.Response(Protocol.SC_Success)
             if not acceptJson:
                 response.appendNode('jobs', json.dumps(responseList))
             else:
                 response.appendNode('jobs', responseList)
     else:
         if tmpWrapper.result == Protocol.TimeOutToken:
             # timeout
             response = Protocol.Response(Protocol.SC_TimeOut)
         else:
             # no available jobs
             response = Protocol.Response(Protocol.SC_NoJobs)
             _pilotReqLogger.info('method=noJob,site=%s,node=%s,type=%s' %
                                  (siteName, node, prodSourceLabel))
     # return
     _logger.debug("getJob : %s %s useGLEXEC=%s ret -> %s" %
                   (siteName, node, useGLEXEC, response.encode(acceptJson)))
     return response.encode(acceptJson)
示例#13
0
def updateJob(req,
              jobId,
              state,
              token=None,
              transExitCode=None,
              pilotErrorCode=None,
              pilotErrorDiag=None,
              timestamp=None,
              timeout=60,
              xml='',
              node=None,
              workdir=None,
              cpuConsumptionTime=None,
              cpuConsumptionUnit=None,
              remainingSpace=None,
              schedulerID=None,
              pilotID=None,
              siteName=None,
              messageLevel=None,
              pilotLog='',
              metaData='',
              cpuConversionFactor=None,
              exeErrorCode=None,
              exeErrorDiag=None,
              pilotTiming=None,
              computingElement=None,
              startTime=None,
              endTime=None,
              nEvents=None,
              nInputFiles=None,
              batchID=None,
              attemptNr=None,
              jobMetrics=None,
              stdout='',
              jobSubStatus=None,
              coreCount=None,
              maxRSS=None,
              maxVMEM=None,
              maxSWAP=None,
              maxPSS=None,
              avgRSS=None,
              avgVMEM=None,
              avgSWAP=None,
              avgPSS=None,
              totRCHAR=None,
              totWCHAR=None,
              totRBYTES=None,
              totWBYTES=None,
              rateRCHAR=None,
              rateWCHAR=None,
              rateRBYTES=None,
              rateWBYTES=None):
    tmpLog = LogWrapper(
        _logger, 'updateJob PandaID={0} PID={1}'.format(jobId, os.getpid()))
    tmpLog.debug('start')
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,
                             realDN,
                             jobDispatcher,
                             site=siteName,
                             hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token, jobDispatcher)
    # accept json
    acceptJson = req.acceptJson()
    _logger.debug(
        "updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s,maxRSS=%s,maxVMEM=%s,maxSWAP=%s,maxPSS=%s,avgRSS=%s,avgVMEM=%s,avgSWAP=%s,avgPSS=%s,totRCHAR=%s,totWCHAR=%s,totRBYTES=%s,totWBYTES=%s,rateRCHAR=%s,rateWCHAR=%s,rateRBYTES=%s,rateWBYTES=%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)"
        % (jobId, state, transExitCode, pilotErrorCode, pilotErrorDiag, node,
           workdir, cpuConsumptionTime, cpuConsumptionUnit, remainingSpace,
           schedulerID, pilotID, siteName, messageLevel, nEvents, nInputFiles,
           cpuConversionFactor, exeErrorCode, exeErrorDiag, pilotTiming,
           computingElement, startTime, endTime, batchID, attemptNr,
           jobSubStatus, coreCount, realDN, prodManager, token, validToken,
           str(fqans), maxRSS, maxVMEM, maxSWAP, maxPSS, avgRSS, avgVMEM,
           avgSWAP, avgPSS, totRCHAR, totWCHAR, totRBYTES, totWBYTES,
           rateRCHAR, rateWCHAR, rateRBYTES, rateWBYTES, xml, pilotLog[:1024],
           metaData[:1024], jobMetrics, stdout))
    _pilotReqLogger.debug('method=updateJob,site=%s,node=%s,type=None' %
                          (siteName, node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        if acceptJson:
            tmpMsg = 'no production/pilot role in VOMS FQANs or non pilot owner'
        else:
            tmpMsg = None
        return Protocol.Response(Protocol.SC_Role, tmpMsg).encode(acceptJson)
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode(acceptJson)
    # aborting message
    if jobId == 'NULL':
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # check status
    if not state in [
            'running', 'failed', 'finished', 'holding', 'starting',
            'transferring'
    ]:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime'] = cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit'] = cpuConsumptionUnit
    if node != None:
        param['modificationHost'] = node[:128]
    if transExitCode != None:
        param['transExitCode'] = transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode'] = pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag'] = pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics'] = jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID'] = schedulerID
    if pilotID != None:
        param['pilotID'] = pilotID[:200]
    if batchID != None:
        param['batchID'] = batchID[:80]
    if exeErrorCode != None:
        param['exeErrorCode'] = exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag'] = exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion'] = cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming'] = pilotTiming
    if computingElement != None:
        param['computingElement'] = computingElement
    if nEvents != None:
        param['nEvents'] = nEvents
    if nInputFiles != None:
        param['nInputFiles'] = nInputFiles
    if not jobSubStatus in [None, '']:
        param['jobSubStatus'] = jobSubStatus
    if not coreCount in [None, '']:
        param['actualCoreCount'] = coreCount
    if maxRSS != None:
        param['maxRSS'] = maxRSS
    if maxVMEM != None:
        param['maxVMEM'] = maxVMEM
    if maxSWAP != None:
        param['maxSWAP'] = maxSWAP
    if maxPSS != None:
        param['maxPSS'] = maxPSS
    if avgRSS != None:
        param['avgRSS'] = avgRSS
    if avgVMEM != None:
        param['avgVMEM'] = avgVMEM
    if avgSWAP != None:
        param['avgSWAP'] = avgSWAP
    if avgPSS != None:
        param['avgPSS'] = avgPSS
    if totRCHAR is not None:
        totRCHAR = int(totRCHAR) / 1024  # convert to kByte
        totRCHAR = min(10**10 - 1, totRCHAR)  # limit to 10 digit
        param['totRCHAR'] = totRCHAR
    if totWCHAR is not None:
        totWCHAR = int(totWCHAR) / 1024  # convert to kByte
        totWCHAR = min(10**10 - 1, totWCHAR)  # limit to 10 digit
        param['totWCHAR'] = totWCHAR
    if totRBYTES is not None:
        totRBYTES = int(totRBYTES) / 1024  # convert to kByte
        totRBYTES = min(10**10 - 1, totRBYTES)  # limit to 10 digit
        param['totRBYTES'] = totRBYTES
    if totWBYTES is not None:
        totWBYTES = int(totWBYTES) / 1024  # convert to kByte
        totWBYTES = min(10**10 - 1, totWBYTES)  # limit to 10 digit
        param['totWBYTES'] = totWBYTES
    if rateRCHAR is not None:
        rateRCHAR = min(10**10 - 1, int(rateRCHAR))  # limit to 10 digit
        param['rateRCHAR'] = rateRCHAR
    if rateWCHAR is not None:
        rateWCHAR = min(10**10 - 1, int(rateWCHAR))  # limit to 10 digit
        param['rateWCHAR'] = rateWCHAR
    if rateRBYTES is not None:
        rateRBYTES = min(10**10 - 1, int(rateRBYTES))  # limit to 10 digit
        param['rateRBYTES'] = rateRBYTES
    if rateWBYTES is not None:
        rateWBYTES = min(10**10 - 1, int(rateWBYTES))  # limit to 10 digit
        param['rateWBYTES'] = rateWBYTES
    if startTime != None:
        try:
            param['startTime'] = datetime.datetime(
                *time.strptime(startTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime'] = datetime.datetime(
                *time.strptime(endTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    tmpLog.debug('executing')
    return jobDispatcher.updateJob(int(jobId), state, int(timeout), xml,
                                   siteName, param, metaData, pilotLog,
                                   attemptNr, stdout, acceptJson)