Пример #1
0
  def submitJob( self, executableFile, proxy, numberOfJobs = 1, processors = 1 ):
    """ Method to submit job
    """

    self.log.verbose( "Executable file path: %s" % executableFile )
    if not os.access( executableFile, 5 ):
      os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH )

    batchIDList = []
    stampDict = {}
    if numberOfJobs == 1:
      jdlName, diracStamp = self.__writeJDL( executableFile, processors = processors )
      cmd = ['glite-ce-job-submit', '-n', '-a', '-N', '-r',
             '%s/%s' % ( self.ceName, self.queue ),
             '%s' % jdlName ]

      result = executeGridCommand( self.proxy, cmd, self.gridEnv )
      os.unlink( jdlName )
      if result['OK']:
        if result['Value'][0]:
          # We have got a non-zero status code
          errorString = '\n'.join( result['Value'][1:] ).strip()
          return S_ERROR( 'Pilot submission failed with error: %s ' % errorString )
        pilotJobReference = result['Value'][1].strip()
        if not pilotJobReference:
          return S_ERROR( 'No pilot reference returned from the glite job submission command' )
        if not pilotJobReference.startswith( 'https' ):
          return S_ERROR( 'Invalid pilot reference %s' % pilotJobReference )
        batchIDList.append( pilotJobReference )
        stampDict[pilotJobReference] = diracStamp
    else:
      delegationID = makeGuid()
      cmd = [ 'glite-ce-delegate-proxy', '-e', '%s' % self.ceName, '%s' % delegationID ]
      result = executeGridCommand( self.proxy, cmd, self.gridEnv )
      if not result['OK']:
        self.log.error( 'Failed to delegate proxy', result['Message'] )
        return result
      for _i in range( numberOfJobs ):
        jdlName, diracStamp = self.__writeJDL( executableFile, processors = processors )
        cmd = ['glite-ce-job-submit', '-n', '-N', '-r',
               '%s/%s' % ( self.ceName, self.queue ),
               '-D', '%s' % delegationID, '%s' % jdlName ]
        result = executeGridCommand( self.proxy, cmd, self.gridEnv )
        os.unlink( jdlName )
        if not result['OK']:
          break
        if result['Value'][0] != 0:
          break
        pilotJobReference = result['Value'][1].strip()
        if pilotJobReference and pilotJobReference.startswith( 'https' ):
          batchIDList.append( pilotJobReference )
          stampDict[pilotJobReference] = diracStamp
        else:
          break
    if batchIDList:
      result = S_OK( batchIDList )
      result['PilotStampDict'] = stampDict
    else:
      result = S_ERROR( 'No pilot references obtained from the glite job submission' )
    return result
Пример #2
0
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """
    if self.proxyRenewal % 60 == 0:
      self.proxyRenewal += 1
      statusList = ['REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING']
      cmd = ['glite-ce-job-status', '-L', '2', '--all', '-e',
             '%s' % self.ceName, '-s',
             '%s' % ':'.join( statusList ) ]
      result = executeGridCommand( self.proxy, cmd, self.gridEnv )
      if result['OK']:
        delegationIDs = []
        for line in result['Value'][1].split( '\n' ):
          if line.find( 'Deleg Proxy ID' ) != -1:
            delegationID = line.split()[-1].replace( '[', '' ).replace( ']', '' )
            if delegationID not in delegationIDs:
              delegationIDs.append( delegationID )
        if delegationIDs:
          cmd = ['glite-ce-proxy-renew', '-e', self.ceName ]
          cmd.extend( delegationIDs )
          self.log.info( 'Refreshing proxy for:', ' '.join( delegationIDs ) )
          result = executeGridCommand( self.proxy, cmd, self.gridEnv )

    workingDirectory = self.ceParameters['WorkingDirectory']
    fd, idFileName = tempfile.mkstemp( suffix = '.ids', prefix = 'CREAM_', dir = workingDirectory )
    idFile = os.fdopen( fd, 'w' )
    idFile.write( '##CREAMJOBS##' )
    for id_ in jobIDList:
      if ":::" in id_:
        ref, stamp = id_.split( ':::' )
      else:
        ref = id_
      idFile.write( '\n' + ref )
    idFile.close()

    cmd = ['glite-ce-job-status', '-n', '-i', '%s' % idFileName ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    os.unlink( idFileName )
    resultDict = {}
    if not result['OK']:
      self.log.error( 'Failed to get job status', result['Message'] )
      return result
    if result['Value'][0]:
      if result['Value'][2]:
        return S_ERROR( result['Value'][2] )
      else:
        return S_ERROR( 'Error while interrogating job statuses' )
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )

    if not resultDict:
      return  S_ERROR( 'No job statuses returned' )

    # If CE does not know about a job, set the status to Unknown
    for job in jobIDList:
      if not resultDict.has_key( job ):
        resultDict[job] = 'Unknown'

    return S_OK( resultDict )
Пример #3
0
  def submitJob( self, executableFile, proxy, numberOfJobs = 1 ):
    """ Method to submit job
    """

    self.log.verbose( "Executable file path: %s" % executableFile )
    if not os.access( executableFile, 5 ):
      os.chmod( executableFile, 0755 )

    batchIDList = []
    stampDict = {}
    if numberOfJobs == 1:
      jdlName, diracStamp = self.__writeJDL( executableFile )
      cmd = ['glite-ce-job-submit', '-n', '-a', '-N', '-r',
             '%s/%s' % ( self.ceName, self.queue ),
             '%s' % jdlName ]
      result = executeGridCommand( self.proxy, cmd, self.gridEnv )

      if result['OK']:
        pilotJobReference = result['Value'][1].strip()
        batchIDList.append( pilotJobReference )
        stampDict[pilotJobReference] = diracStamp
      os.unlink( jdlName )
    else:
      delegationID = makeGuid()
      cmd = [ 'glite-ce-delegate-proxy', '-e', '%s' % self.ceName, '%s' % delegationID ]
      result = executeGridCommand( self.proxy, cmd, self.gridEnv )
      if not result['OK']:
        self.log.error('Failed to delegate proxy: %s' % result['Message'])
        return result
      for i in range( numberOfJobs ):
        jdlName, diracStamp = self.__writeJDL( executableFile )
        cmd = ['glite-ce-job-submit', '-n', '-N', '-r',
               '%s/%s' % ( self.ceName, self.queue ),
               '-D', '%s' % delegationID, '%s' % jdlName ]
        result = executeGridCommand( self.proxy, cmd, self.gridEnv )
        if not result['OK']:
          break
        if result['Value'][0] != 0:
          break
        pilotJobReference = result['Value'][1].strip()
        if pilotJobReference:
          batchIDList.append( pilotJobReference )
          stampDict[pilotJobReference] = diracStamp
        else:
          break  
        os.unlink( jdlName )

    os.unlink( executableFile )
    if batchIDList:
      result = S_OK( batchIDList )
      result['PilotStampDict'] = stampDict
    else:
      result = S_ERROR('No pilot references obtained from the glite job submission')  
    return result
Пример #4
0
  def getJobOutput( self, jobID, localDir = None ):
    """ Get the specified job standard output and error files. If the localDir is provided,
        the output is returned as file in this directory. Otherwise, the output is returned 
        as strings. 
    """
    if jobID.find( ':::' ) != -1:
      pilotRef, stamp = jobID.split( ':::' )
    else:
      pilotRef = jobID
      stamp = ''
    if not stamp:
      return S_ERROR( 'Pilot stamp not defined for %s' % pilotRef )

    outURL = self.ceParameters.get( 'OutputURL', 'gsiftp://localhost' )
    if outURL == 'gsiftp://localhost':
      result = self.__resolveOutputURL( pilotRef )
      if not result['OK']:
        return result
      outURL = result['Value']

    outputURL = os.path.join( outURL, '%s.out' % stamp )
    errorURL = os.path.join( outURL, '%s.err' % stamp )
    workingDirectory = self.ceParameters['WorkingDirectory']
    outFileName = os.path.join( workingDirectory, os.path.basename( outputURL ) )
    errFileName = os.path.join( workingDirectory, os.path.basename( errorURL ) )

    cmd = ['globus-url-copy', '%s' % outputURL, 'file://%s' % outFileName ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    output = ''
    if result['OK']:
      if not result['Value'][0]:
        outFile = open( outFileName, 'r' )
        output = outFile.read()
        outFile.close()
        os.unlink( outFileName )
      else:
        error = '\n'.join( result['Value'][1:] )
        return S_ERROR( error )  
    else:
      return S_ERROR( 'Failed to retrieve output for %s' % jobID )

    cmd = ['globus-url-copy', '%s' % errorURL, '%s' % errFileName ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    error = ''
    if result['OK']:
      if not result['Value'][0]:
        errFile = open( errFileName, 'r' )
        error = errFile.read()
        errFile.close()
        os.unlink( errFileName )
    else:
      return S_ERROR( 'Failed to retrieve error for %s' % jobID )

    return S_OK( ( output, error ) )
Пример #5
0
    def getJobOutput(self, jobID, localDir=None):
        """ Get the specified job standard output and error files. If the localDir is provided,
        the output is returned as file in this directory. Otherwise, the output is returned 
        as strings. 
    """
        if jobID.find(":::") != -1:
            pilotRef, stamp = jobID.split(":::")
        else:
            pilotRef = jobID
            stamp = ""
        if not stamp:
            return S_ERROR("Pilot stamp not defined for %s" % pilotRef)

        outURL = self.ceParameters.get("OutputURL", "gsiftp://localhost")
        if outURL == "gsiftp://localhost":
            result = self.__resolveOutputURL(pilotRef)
            if not result["OK"]:
                return result
            outURL = result["Value"]

        outputURL = os.path.join(outURL, "%s.out" % stamp)
        errorURL = os.path.join(outURL, "%s.err" % stamp)
        workingDirectory = self.ceParameters["WorkingDirectory"]
        outFileName = os.path.join(workingDirectory, os.path.basename(outputURL))
        errFileName = os.path.join(workingDirectory, os.path.basename(errorURL))

        cmd = ["globus-url-copy", "%s" % outputURL, "file://%s" % outFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        output = ""
        if result["OK"]:
            if not result["Value"][0]:
                outFile = open(outFileName, "r")
                output = outFile.read()
                outFile.close()
                os.unlink(outFileName)
        else:
            return S_ERROR("Failed to retrieve output for %s" % jobID)

        cmd = ["globus-url-copy", "%s" % errorURL, "%s" % errFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        error = ""
        if result["OK"]:
            if not result["Value"][0]:
                errFile = open(errFileName, "r")
                error = errFile.read()
                errFile.close()
                os.unlink(errFileName)
        else:
            return S_ERROR("Failed to retrieve error for %s" % jobID)

        return S_OK((output, error))
Пример #6
0
def getPilotLoggingInfo( grid, pilotRef ):
  """
   Get LoggingInfo of a GRID job
  """
  if grid == 'gLite':
    cmd = [ 'glite-wms-job-logging-info', '-v', '3', '--noint', pilotRef ]
  elif grid == 'CREAM':
    cmd = [ 'glite-ce-job-status', '-L', '2', '%s' % pilotRef ]
  elif grid == 'HTCondorCE':
    ## need to import here, otherwise import errors happen
    from DIRAC.Resources.Computing.HTCondorCEComputingElement import getCondorLogFile
    resLog = getCondorLogFile( pilotRef )
    if not resLog['OK']:
      return resLog
    logFile = resLog['Value']
    cmd = [ 'cat', " ".join(logFile) ]
  else:
    return S_ERROR( 'Pilot logging not available for %s CEs' % grid )

  gridEnv = getGridEnv()
  ret = executeGridCommand( '', cmd, gridEnv )
  if not ret['OK']:
    return ret

  status, output, error = ret['Value']
  if status:
    return S_ERROR( error )

  return S_OK( output )
Пример #7
0
 def __getSummary( self ):
   res = self.__isSummaryValid()
   if not res['OK']:
     return res
   comm = ['glite-transfer-status', '--verbose', '-s', self.ftsServer, self.ftsGUID]
   res = executeGridCommand( '', comm, self.gridEnv )
   if not res['OK']:
     return res
   returnCode, output, errStr = res['Value']
   # Returns a non zero status if error
   if not returnCode == 0:
     return S_ERROR( errStr )
   # Parse the output to get a summary dictionary
   lines = output.splitlines()
   summaryDict = {}
   for line in lines:
     line = line.split( ':\t' )
     key = line[0].replace( '\t', '' )
     value = line[1].replace( '\t', '' )
     summaryDict[key] = value
   self.requestStatus = summaryDict['Status']
   self.submitTime = summaryDict['Submit time']
   self.statusSummary = {}
   for status in self.fileStates:
     if summaryDict[status] != '0':
       self.statusSummary[status] = int( summaryDict[status] )
   return S_OK()
Пример #8
0
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    workingDirectory = self.ceParameters['WorkingDirectory']
    fd, idFileName = tempfile.mkstemp( suffix = '.ids', prefix = 'CREAM_', dir = workingDirectory )
    idFile = os.fdopen( fd, 'w' )
    idFile.write( '##CREAMJOBS##' )
    for id in jobIDList:
      idFile.write( '\n' + id )
    idFile.close()

    cmd = ['glite-ce-job-status', '-n', '-i', '%s' % idFileName ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    os.unlink( idFileName )
    resultDict = {}
    if not result['OK']:
      self.log.error('Failed to get job status',result['Message'])
      return result
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )

    # If CE does not know about a job, set the status to Unknown
    for job in jobIDList:
      if not resultDict.has_key( job ):
        resultDict[job] = 'Unknown'

    return S_OK( resultDict )
Пример #9
0
    def getCEStatus(self):
        """ Method to return information on running and pending jobs.
    """
        statusList = ["REGISTERED", "PENDING", "IDLE", "RUNNING", "REALLY-RUNNING"]
        cmd = ["glite-ce-job-status", "-n", "-a", "-e", "%s" % self.ceName, "-s", "%s" % ":".join(statusList)]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        resultDict = {}
        if not result["OK"]:
            return result
        if result["Value"][0]:
            if result["Value"][2]:
                return S_ERROR(result["Value"][2])
            else:
                return S_ERROR("Error while interrogating CE status")
        if result["Value"][1]:
            resultDict = self.__parseJobStatus(result["Value"][1])

        running = 0
        waiting = 0
        for ref, status in resultDict.items():
            if status == "Scheduled":
                waiting += 1
            if status == "Running":
                running += 1

        result = S_OK()
        result["RunningJobs"] = running
        result["WaitingJobs"] = waiting
        result["SubmittedJobs"] = 0
        return result
Пример #10
0
  def getCEStatus( self ):
    """ Method to return information on running and pending jobs.
    """
    statusList = ['REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING']
    cmd = ['glite-ce-job-status', '-n', '-a', '-e',
           '%s' % self.ceName, '-s',
           '%s' % ':'.join( statusList ) ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    resultDict = {}
    if not result['OK']:
      return result
    if result['Value'][0]:
      if result['Value'][2]:
        return S_ERROR(result['Value'][2])
      else:
        return S_ERROR('Error while interrogating CE status')
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )

    running = 0
    waiting = 0
    for ref, status in resultDict.items():
      if status == 'Scheduled':
        waiting += 1
      if status == 'Running':
        running += 1

    result = S_OK()
    result['RunningJobs'] = running
    result['WaitingJobs'] = waiting
    result['SubmittedJobs'] = 0
    return result
Пример #11
0
    def submitFTS2(self, stageFiles=False):
        """ submit fts job using FTS2 client """
        if self.FTSGUID:
            return S_ERROR("FTSJob already has been submitted")
        surls = self._surlPairs()
        if not surls:
            return S_ERROR("No files to submit")
        fd, fileName = tempfile.mkstemp()
        surlFile = os.fdopen(fd, "w")
        surlFile.write(surls)
        surlFile.close()
        submitCommand = ["glite-transfer-submit", "-s", self.FTSServer, "-f", fileName, "-o", "--compare-checksums"]
        if self.TargetToken:
            submitCommand.append("-t %s" % self.TargetToken)
        if self.SourceToken:
            submitCommand.append("-S %s" % self.SourceToken)
        if stageFiles:
            submitCommand.append("--copy-pin-lifetime 86400")

        submit = executeGridCommand("", submitCommand)
        os.remove(fileName)
        if not submit["OK"]:
            return submit
        returnCode, output, errStr = submit["Value"]
        if not returnCode == 0:
            return S_ERROR(errStr)
        self.FTSGUID = output.replace("\n", "")
        self.Status = "Submitted"
        for ftsFile in self:
            ftsFile.FTSGUID = self.FTSGUID
            ftsFile.Status = "Submitted"
        return S_OK()
Пример #12
0
def getPilotLoggingInfo( proxy, grid, pilotRef ):
  """
   Get LoggingInfo of a GRID job
  """
  if grid == 'LCG':
    cmd = [ 'edg-job-get-logging-info', '-v', '2' ]
  elif grid == 'gLite':
    cmd = [ 'glite-wms-job-logging-info', '-v', '3' ]
  else:
    return S_ERROR( 'Unknnown GRID %s' % grid )

  cmd.extend( ['--noint', pilotRef] )

  gridEnv = ''
  setup = gConfig.getValue( '/DIRAC/Setup', '' )
  if setup:
    instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '' )
    if instance:
      gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '' )

  ret = executeGridCommand( proxy, cmd, gridEnv )
  if not ret['OK']:
    return ret

  status, output, error = ret['Value']
  if status:
    return S_ERROR( error )

  return S_OK( output )
Пример #13
0
  def getCEStatus( self ):
    """ Method to return information on running and pending jobs.
    """
    cmd = ['arcstat', '-c', self.ceHost, '-j', self.ceParameters['JobListFile'] ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    resultDict = {}
    if not result['OK']:
      return result
    if result['Value'][0]:
      if result['Value'][2]:
        return S_ERROR(result['Value'][2])
      else:
        return S_ERROR('Error while interrogating CE status')
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )

    running = 0
    waiting = 0
    for ref in resultDict:
      status = resultDict[ref]
      if status == 'Scheduled':
        waiting += 1
      if status == 'Running':
        running += 1

    result = S_OK()
    result['RunningJobs'] = running
    result['WaitingJobs'] = waiting
    result['SubmittedJobs'] = 0
    return result
Пример #14
0
  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    comm = [ 'glite-transfer-submit', '-s', self.ftsServer, '-f', self.surlFile, '-o' ]
    if self.targetToken:
      comm += [ '-t', self.targetToken ]
    if self.sourceToken:
      comm += [ '-S', self.sourceToken ]
    if self.__cksmTest:
      comm.append( "--compare-checksums" )
    gLogger.verbose( 'Executing %s' % ' '.join( comm ) )
    res = executeGridCommand( '', comm )
    os.remove( self.surlFile )
    if not res['OK']:
      return res
    returnCode, output, errStr = res['Value']
    if not returnCode == 0:
      return S_ERROR( errStr )
    guid = output.replace( '\n', '' )
    if not checkGuid( guid ):
      return S_ERROR( 'Wrong GUID format returned' )
    self.ftsGUID = guid
    # if self.priority != 3:
    #  comm = ['glite-transfer-setpriority','-s', self.ftsServer,self.ftsGUID,str(self.priority)]
    #  executeGridCommand('',comm)
    return res
Пример #15
0
 def __getFullOutput( self ):
   comm = ['glite-transfer-status', '-s', self.ftsServer, '-l', self.ftsGUID]
   res = executeGridCommand( '', comm, self.gridEnv )
   if not res['OK']:
     return res
   returnCode, output, errStr = res['Value']
   # Returns a non zero status if error
   if not returnCode == 0:
     return S_ERROR( errStr )
   statusExp = re.compile( "^(\S+)" )
   self.requestStatus = re.search( statusExp, output ).group( 1 )
   output = output.replace( "%s\n" % self.requestStatus, "", 1 )
   toRemove = ["'", "<", ">"]
   for char in toRemove:
     output = output.replace( char, '' )
   regExp = re.compile( "[ ]+Source:[ ]+(\S+)\n[ ]+Destination:[ ]+(\S+)\n[ ]+State:[ ]+(\S+)\n[ ]+Retries:[ ]+(\d+)\n[ ]+Reason:[ ]+([\S ]+).+?[ ]+Duration:[ ]+(\d+)", re.S )
   fileInfo = re.findall( regExp, output )
   for source, target, status, retries, reason, duration in fileInfo:
     lfn = ''
     for candidate in sortList( self.fileDict.keys() ):
       if re.search( candidate, source ):
         lfn = candidate
     if not lfn:
       continue
     self.__setFileParameter( lfn, 'Source', source )
     self.__setFileParameter( lfn, 'Target', target )
     self.__setFileParameter( lfn, 'Status', status )
     if reason == '(null)':
       reason = ''
     self.__setFileParameter( lfn, 'Reason', reason.replace( "\n", " " ) )
     self.__setFileParameter( lfn, 'Duration', int( duration ) )
   return S_OK()
Пример #16
0
  def getCEStatus( self, jobIDList = None ):
    """ Method to return information on running and pending jobs.
    
        :param list jobIDList: list of job IDs to be considered
    """
    statusList = ['REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING']
    cmd = ['glite-ce-job-status', '-n', '-a', '-e',
           '%s' % self.ceName, '-s',
           '%s' % ':'.join( statusList ) ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    resultDict = {}
    if not result['OK']:
      return result
    if result['Value'][0]:
      if result['Value'][0] == 11:
        return S_ERROR( 'Segmentation fault while calling glite-ce-job-status' )
      elif result['Value'][2]:
        return S_ERROR( result['Value'][2] )
      elif "Authorization error" in result['Value'][1]:
        return S_ERROR( "Authorization error" )
      elif "FaultString" in result['Value'][1]:
        res = re.search( 'FaultString=\[([\w\s]+)\]', result['Value'][1] )
        fault = ''
        if res:
          fault = res.group( 1 )
        detail = ''
        res = re.search( 'FaultDetail=\[([\w\s]+)\]', result['Value'][1] )  
        if res:
          detail = res.group( 1 )
          return S_ERROR( "Error: %s:%s" % (fault,detail) )
      else:
        return S_ERROR( 'Error while interrogating CE status' )
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )

    running = 0
    waiting = 0
    statusDict = {}
    for ref, status in resultDict.items():
      if jobIDList is not None and not ref in jobIDList:
        continue
      if status == 'Scheduled':
        waiting += 1
      if status == 'Running':
        running += 1
      statusDict[ref] = status  

    result = S_OK()
    result['RunningJobs'] = running
    result['WaitingJobs'] = waiting
    result['SubmittedJobs'] = 0
    result['JobStatusDict'] = statusDict
    return result
Пример #17
0
  def __parseOutput( self, full = False ):
    """ execute glite-transfer-status command and parse its output

    :param self: self reference
    :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well
    """
    if full:
      res = self.__isMonitorValid()
    else:
      res = self.__isSummaryValid()
    if not res['OK']:
      return res
    comm = [ 'glite-transfer-status', '--verbose', '-s', self.ftsServer, self.ftsGUID ]
    if full:
      comm.append( '-l' )
    res = executeGridCommand( '', comm )
    if not res['OK']:
      return res
    returnCode, output, errStr = res['Value']
    # Returns a non zero status if error
    if not returnCode == 0:
      return S_ERROR( errStr )
    toRemove = ["'", "<", ">"]
    for char in toRemove:
      output = output.replace( char, '' )
    regExp = re.compile( "Status:\s+(\S+)" )
    self.requestStatus = re.search( regExp, output ).group( 1 )
    regExp = re.compile( "Submit time:\s+(\S+ \S+)" )
    self.submitTime = re.search( regExp, output ).group( 1 )
    self.statusSummary = {}
    for state in self.fileStates:
      regExp = re.compile( "\s+%s:\s+(\d+)" % state )
      self.statusSummary[state] = int( re.search( regExp, output ).group( 1 ) )
    if not full:
      return S_OK()
    regExp = re.compile( "[ ]+Source:[ ]+(\S+)\n[ ]+Destination:[ ]+(\S+)\n[ ]+State:[ ]+(\S+)\n[ ]+Retries:[ ]+(\d+)\n[ ]+Reason:[ ]+([\S ]+).+?[ ]+Duration:[ ]+(\d+)", re.S )
    fileInfo = re.findall( regExp, output )
    for source, target, status, retries, reason, duration in fileInfo:
      lfn = ''
      for candidate in sorted( self.fileDict ):
        if re.search( candidate, source ):
          lfn = candidate
      if not lfn:
        continue
      self.__setFileParameter( lfn, 'Source', source )
      self.__setFileParameter( lfn, 'Target', target )
      self.__setFileParameter( lfn, 'Status', status )
      if reason == '(null)':
        reason = ''
      self.__setFileParameter( lfn, 'Reason', reason.replace( "\n", " " ) )
      self.__setFileParameter( lfn, 'Duration', int( duration ) )
    return S_OK()
Пример #18
0
    def getCEStatus(self, jobIDList=None):
        """ Method to return information on running and pending jobs.
    
        :param list jobIDList: list of job IDs to be considered
    """
        statusList = ["REGISTERED", "PENDING", "IDLE", "RUNNING", "REALLY-RUNNING"]
        cmd = ["glite-ce-job-status", "-n", "-a", "-e", "%s" % self.ceName, "-s", "%s" % ":".join(statusList)]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        resultDict = {}
        if not result["OK"]:
            return result
        if result["Value"][0]:
            if result["Value"][0] == 11:
                return S_ERROR("Segmentation fault while calling glite-ce-job-status")
            elif result["Value"][2]:
                return S_ERROR(result["Value"][2])
            elif "Authorization error" in result["Value"][1]:
                return S_ERROR("Authorization error")
            elif "FaultString" in result["Value"][1]:
                res = re.search("FaultString=\[([\w\s]+)\]", result["Value"][1])
                fault = ""
                if res:
                    fault = res.group(1)
                detail = ""
                res = re.search("FaultDetail=\[([\w\s]+)\]", result["Value"][1])
                if res:
                    detail = res.group(1)
                    return S_ERROR("Error: %s:%s" % (fault, detail))
            else:
                return S_ERROR("Error while interrogating CE status")
        if result["Value"][1]:
            resultDict = self.__parseJobStatus(result["Value"][1])

        running = 0
        waiting = 0
        statusDict = {}
        for ref, status in resultDict.items():
            if jobIDList is not None and not ref in jobIDList:
                continue
            if status == "Scheduled":
                waiting += 1
            if status == "Running":
                running += 1
            statusDict[ref] = status

        result = S_OK()
        result["RunningJobs"] = running
        result["WaitingJobs"] = waiting
        result["SubmittedJobs"] = 0
        result["JobStatusDict"] = statusDict
        return result
Пример #19
0
def getWMSPilotOutput( pilotRef ):
  """
   Get Output of a GRID job
  """
  tmp_dir = mkdtemp()
  cmd = [ 'glite-wms-job-output', '--noint', '--dir', tmp_dir, pilotRef]

  gridEnv = getGridEnv()

  ret = executeGridCommand( '', cmd, gridEnv )
  if not ret['OK']:
    shutil.rmtree( tmp_dir )
    return ret

  status, output, error = ret['Value']

  for errorString in [ 'already retrieved',
                       'Output not yet Ready',
                       'not yet ready',
                       'the status is ABORTED',
                       'No output files' ]:
    if errorString in error:
      shutil.rmtree( tmp_dir )
      return S_ERROR( error )
    if errorString in output:
      shutil.rmtree( tmp_dir )
      return S_ERROR( output )

  if status:
    shutil.rmtree( tmp_dir )
    return S_ERROR( error )

  # Get the list of files
  tmp_dir = os.path.join( tmp_dir, os.listdir( tmp_dir )[0] )

  result = S_OK()
  result['FileList'] = outputSandboxFiles

  for filename in outputSandboxFiles:
    tmpname = os.path.join( tmp_dir, filename )
    if os.path.exists( tmpname ):
      myfile = file( tmpname, 'r' )
      f = myfile.read()
      myfile.close()
    else:
      f = ''
    result[filename] = f

  shutil.rmtree( tmp_dir )
  return result
Пример #20
0
    def killJob(self, jobIDList):
        """ Kill the specified jobs
    """
        jobList = list(jobIDList)
        if type(jobIDList) in StringTypes:
            jobList = [jobIDList]

        cmd = ["glite-ce-job-cancel", "-n", "-N"] + jobList
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        if not result["OK"]:
            return result
        if result["Value"][0] != 0:
            return S_ERROR("Failed kill job: %s" % result["Value"][0][1])

        return S_OK()
Пример #21
0
 def killJob( self, jobIDList ):
   """ Kill the specified jobs
   """
   jobList = list( jobIDList )
   if type( jobIDList ) in StringTypes:
     jobList = [ jobIDList ]
     
   cmd = ['glite-ce-job-cancel','-n','-N']+jobList
   result = executeGridCommand( self.proxy, cmd, self.gridEnv )
   if not result['OK']:
     return result
   if result['Value'][0] != 0:
     return S_ERROR( 'Failed kill job: %s' % result['Value'][0][1] )   
     
   return S_OK()
Пример #22
0
  def killJob( self, jobIDList ):
    """ Kill the specified jobs
    """
    jobList = list( jobIDList )
    if isinstance(jobIDList, basestring):
      jobList = [ jobIDList ]
    for jobID in jobList:
      cmd = ['globus-job-clean', '-f', jobID]
      result = executeGridCommand( self.proxy, cmd, self.gridEnv )
      if not result['OK']:
        return result
      if result['Value'][0] != 0:
        return S_ERROR( 'Failed kill job: %s' % result['Value'][1].strip() )

    return S_OK()
Пример #23
0
    def killJob(self, jobIDList):
        """ Kill the specified jobs
    """
        jobList = list(jobIDList)
        if isinstance(jobIDList, basestring):
            jobList = [jobIDList]

        cmd = ['glite-ce-job-cancel', '-n', '-N'] + jobList
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        if not result['OK']:
            return result
        if result['Value'][0] != 0:
            errorString = '\n'.join(result['Value'][1:]).strip()
            return S_ERROR('Failed kill job: %s' % errorString)

        return S_OK()
Пример #24
0
    def killJob(self, jobIDList):
        """ Kill the specified jobs
    #FIXME: Needs to be tested
    """
        jobList = list(jobIDList)
        if isinstance(jobIDList, basestring):
            jobList = [jobIDList]
        for jobID in jobList:
            cmd = ['globus-job-clean', jobID]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if not result['OK']:
                return result
            if result['Value'][0] != 0:
                return S_ERROR('Failed kill job: %s' % result['Value'][0][1])

        return S_OK()
Пример #25
0
  def killJob( self, jobIDList ):
    """ Kill the specified jobs
    """
    jobList = list( jobIDList )
    if isinstance( jobIDList, basestring ):
      jobList = [ jobIDList ]

    cmd = ['glite-ce-job-cancel', '-n', '-N'] + jobList
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    if not result['OK']:
      return result
    if result['Value'][0] != 0:
      errorString = '\n'.join( result['Value'][1:] ).strip()
      return S_ERROR( 'Failed kill job: %s' % errorString )

    return S_OK()
Пример #26
0
    def killJob(self, jobIDList):
        """ Kill the specified jobs
    """
        jobList = list(jobIDList)
        if isinstance(jobIDList, six.string_types):
            jobList = [jobIDList]
        for jobID in jobList:
            cmd = ['globus-job-clean', '-f', jobID]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if not result['OK']:
                return result
            if result['Value'][0] != 0:
                return S_ERROR('Failed kill job: %s' %
                               result['Value'][1].strip())

        return S_OK()
  def getJobOutput(self, jobID, _localDir=None):
    """ TODO: condor can copy the output automatically back to the
    submission, so we just need to pick it up from the proper folder
    """
    self.log.verbose("Getting job output for jobID: %s " % jobID)
    _job, condorID = condorIDFromJobRef(jobID)
    # FIXME: the WMSAdministrator does not know about the
    # SiteDirector WorkingDirectory, it might not even run on the
    # same machine
    #workingDirectory = self.ceParameters.get( 'WorkingDirectory', DEFAULT_WORKINGDIRECTORY )

    if not self.useLocalSchedd:
      cmd = ['condor_transfer_data', '-pool', '%s:9619' % self.ceName, '-name', self.ceName, condorID]
      result = executeGridCommand(self.proxy, cmd, self.gridEnv)
      self.log.verbose(result)
      if not result['OK']:
        self.log.error("Failed to get job output from htcondor", result['Message'])
        return result

    output = ''
    error = ''
    resOut = findFile(self.workingDirectory, '%s.out' % condorID)
    if not resOut['OK']:
      self.log.error("Failed to find output file for condor job", jobID)
      return resOut
    outputfilename = resOut['Value'][0]

    resErr = findFile(self.workingDirectory, '%s.err' % condorID)
    if not resErr['OK']:
      self.log.error("Failed to find error file for condor job", jobID)
      return resErr
    errorfilename = resErr['Value'][0]

    try:
      with open(outputfilename) as outputfile:
        output = outputfile.read()
    except IOError as e:
      self.log.error("Failed to open outputfile", str(e))
      return S_ERROR("Failed to get pilot output")
    try:
      with open(errorfilename) as errorfile:
        error = errorfile.read()
    except IOError as e:
      self.log.error("Failed to open errorfile", str(e))
      return S_ERROR("Failed to get pilot error")

    return S_OK((output, error))
Пример #28
0
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    workingDirectory = self.ceParameters['WorkingDirectory']
    fd, name = tempfile.mkstemp( suffix = '.list', prefix = 'StatJobs_', dir = workingDirectory )
    jobListFile = os.fdopen( fd, 'w' )
    
    jobTmpList = list( jobIDList )
    if type( jobIDList ) in StringTypes:
      jobTmpList = [ jobIDList ]


    jobList = []
    for j in jobTmpList:
      if ":::" in j:
        job = j.split(":::")[0] 
      else:
        job = j
      jobList.append( job )
      jobListFile.write( job+'\n' )  
      
    cmd = ['arcstat','-c',self.ceHost,'-i',name,'-j',self.ceParameters['JobListFile']]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    os.unlink( name )
    
    resultDict = {}
    if not result['OK']:
      self.log.error( 'Failed to get job status', result['Message'] )
      return result
    if result['Value'][0]:
      if result['Value'][2]:
        return S_ERROR(result['Value'][2])
      else:
        return S_ERROR('Error while interrogating job statuses')
    if result['Value'][1]:
      resultDict = self.__parseJobStatus( result['Value'][1] )
     
    if not resultDict:
      return  S_ERROR('No job statuses returned')

    # If CE does not know about a job, set the status to Unknown
    for job in jobList:
      if not resultDict.has_key( job ):
        resultDict[job] = 'Unknown'
    return S_OK( resultDict )
Пример #29
0
    def submitJob(self, executableFile, proxy, numberOfJobs=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(
                executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                | stat.S_IROTH + stat.S_IXOTH)

        batchIDList = []
        stampDict = {}

        i = 0
        while i < numberOfJobs:
            i += 1
            xrslName, diracStamp = self.__writeXRSL(executableFile)
            cmd = [
                'arcsub', '-j', self.ceParameters['JobListFile'], '-c',
                '%s' % self.ceHost,
                '%s' % xrslName
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            os.unlink(xrslName)
            if not result['OK']:
                break
            if result['Value'][0] != 0:
                break
            pilotJobReference = result['Value'][1].strip()
            if pilotJobReference and pilotJobReference.startswith(
                    'Job submitted with jobid:'):
                pilotJobReference = pilotJobReference.replace(
                    'Job submitted with jobid:', '').strip()
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp
            else:
                break

        #os.unlink( executableFile )
        if batchIDList:
            result = S_OK(batchIDList)
            result['PilotStampDict'] = stampDict
        else:
            result = S_ERROR(
                'No pilot references obtained from the glite job submission')
        return result
Пример #30
0
    def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse Job Submit stdout to return pilot reference
    """
        start = time.time()
        self.log.verbose('Executing Job Submit for TaskQueue', taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute Job Submit:', ret['Message'])
            self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy)
            return False
        if ret['Value'][0] != 0:
            self.log.error('Error executing Job Submit:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy)
            return False
        self.log.info('Job Submit Execution Time: %.2f for TaskQueue %d' %
                      ((time.time() - start), taskQueueID))

        stdout = ret['Value'][1]
        stderr = ret['Value'][2]

        submittedPilot = None

        failed = 1
        rb = ''
        for line in List.fromChar(stdout, '\n'):
            m = re.search("(https:\S+)", line)
            if (m):
                glite_id = m.group(1)
                submittedPilot = glite_id
                if not rb:
                    m = re.search("https://(.+):.+", glite_id)
                    rb = m.group(1)
                failed = 0
        if failed:
            self.log.error('Job Submit returns no Reference:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            return False

        self.log.info('Reference %s for TaskQueue %s' %
                      (glite_id, taskQueueID))

        return glite_id, rb
Пример #31
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """

        workingDirectory = self.ceParameters["WorkingDirectory"]
        fd, name = tempfile.mkstemp(suffix=".list", prefix="StatJobs_", dir=workingDirectory)
        jobListFile = os.fdopen(fd, "w")

        jobTmpList = list(jobIDList)
        if type(jobIDList) in StringTypes:
            jobTmpList = [jobIDList]

        jobList = []
        for j in jobTmpList:
            if ":::" in j:
                job = j.split(":::")[0]
            else:
                job = j
            jobList.append(job)
            jobListFile.write(job + "\n")

        cmd = ["arcstat", "-c", self.ceHost, "-i", name, "-j", self.ceParameters["JobListFile"]]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        os.unlink(name)

        resultDict = {}
        if not result["OK"]:
            self.log.error("Failed to get job status", result["Message"])
            return result
        if result["Value"][0]:
            if result["Value"][2]:
                return S_ERROR(result["Value"][2])
            else:
                return S_ERROR("Error while interrogating job statuses")
        if result["Value"][1]:
            resultDict = self.__parseJobStatus(result["Value"][1])

        if not resultDict:
            return S_ERROR("No job statuses returned")

        # If CE does not know about a job, set the status to Unknown
        for job in jobList:
            if not resultDict.has_key(job):
                resultDict[job] = "Unknown"
        return S_OK(resultDict)
  def submitJob(self, executableFile, proxy, numberOfJobs=1):
    """ Method to submit job
    """

    self.log.verbose("Executable file path: %s" % executableFile)
    if not os.access(executableFile, 5):
      os.chmod(executableFile, 0o755)

    subName = self.__writeSub(executableFile, numberOfJobs)

    jobStamps = []
    for _i in range(numberOfJobs):
      jobStamps.append(makeGuid()[:8])

    cmd = ['condor_submit', '-terse', subName]
    # the options for submit to remote are different than the other remoteScheddOptions
    scheddOptions = [] if self.useLocalSchedd else ['-pool', '%s:9619' % self.ceName, '-remote', self.ceName]
    for op in scheddOptions:
      cmd.insert(-1, op)

    result = executeGridCommand(self.proxy, cmd, self.gridEnv)
    self.log.verbose(result)
    os.unlink(subName)
    if not result['OK']:
      self.log.error("Failed to submit jobs to htcondor", result['Message'])
      return result

    if result['Value'][0]:
      # We have got a non-zero status code
      errorString = result['Value'][2] if result['Value'][2] else result['Value'][1]
      return S_ERROR('Pilot submission failed with error: %s ' % errorString.strip())

    pilotJobReferences = self.__getPilotReferences(result['Value'][1].strip())
    if not pilotJobReferences['OK']:
      return pilotJobReferences
    pilotJobReferences = pilotJobReferences['Value']

    self.log.verbose("JobStamps: %s " % jobStamps)
    self.log.verbose("pilotRefs: %s " % pilotJobReferences)

    result = S_OK(pilotJobReferences)
    result['PilotStampDict'] = dict(zip(pilotJobReferences, jobStamps))
    self.log.verbose("Result for submission: %s " % result)
    return result
Пример #33
0
    def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse List Match stdout to return list of matched CE's
    """
        self.log.verbose('Executing List Match for TaskQueue', taskQueueID)

        start = time.time()
        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute List Match:', ret['Message'])
            self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy)
            return False
        if ret['Value'][0] != 0:
            self.log.error('Error executing List Match:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy)
            return False
        self.log.info('List Match Execution Time: %.2f for TaskQueue %d' %
                      ((time.time() - start), taskQueueID))

        stdout = ret['Value'][1]
        stderr = ret['Value'][2]
        availableCEs = []
        # Parse std.out
        for line in List.fromChar(stdout, '\n'):
            if re.search('/jobmanager-', line) or re.search('/cream-', line):
                # TODO: the line has to be stripped from extra info
                availableCEs.append(line)

        if not availableCEs:
            self.log.info('List-Match failed to find CEs for TaskQueue',
                          taskQueueID)
            self.log.info(stdout)
            self.log.info(stderr)
        else:
            self.log.debug('List-Match returns:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.log.info(
                'List-Match found %s CEs for TaskQueue' % len(availableCEs),
                taskQueueID)
            self.log.verbose(', '.join(availableCEs))

        return availableCEs
Пример #34
0
    def submitJob(self, executableFile, proxy, numberOfJobs=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(executableFile, 0755)

        batchIDList = []
        stampDict = {}
        for _i in xrange(numberOfJobs):
            _jdlName, diracStamp = self.__writeRSL(executableFile)
            queueName = '%s/%s' % (self.ceName, self.queue)
            cmd = ['globus-job-submit', queueName, "-s", executableFile]
            #cmd = ['globus-job-submit', '-r %s' % queueName, '-f %s' % jdlName ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            self.log.verbose(result)
            #os.unlink( jdlName )
            if result['OK']:
                if result['Value'][0]:
                    # We have got a non-zero status code
                    errorString = result['Value'][2] if result['Value'][
                        2] else result['Value'][1]
                    return S_ERROR('Pilot submission failed with error: %s ' %
                                   errorString.strip())
                pilotJobReference = result['Value'][1].strip()
                if not pilotJobReference:
                    return S_ERROR(
                        'No pilot reference returned from the glite job submission command'
                    )
                if not pilotJobReference.startswith('https'):
                    return S_ERROR('Invalid pilot reference %s' %
                                   pilotJobReference)
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp

        if batchIDList:
            result = S_OK(batchIDList)
            result['PilotStampDict'] = stampDict
        else:
            result = S_ERROR(
                'No pilot references obtained from the glite job submission')
        return result
    def submitJob(self, executableFile, proxy, numberOfJobs=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(executableFile, 0755)

        subName = self.__writeSub(executableFile, numberOfJobs)

        jobStamps = []
        for _i in range(numberOfJobs):
            jobStamps.append(makeGuid()[:8])

        cmd = ['condor_submit', '-terse', subName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        self.log.verbose(result)
        os.unlink(subName)
        if not result['OK']:
            self.log.error("Failed to submit jobs to htcondor",
                           result['Message'])
            return result

        if result['Value'][0]:
            # We have got a non-zero status code
            errorString = result['Value'][2] if result['Value'][2] else result[
                'Value'][1]
            return S_ERROR('Pilot submission failed with error: %s ' %
                           errorString.strip())

        pilotJobReferences = self.__getPilotReferences(
            result['Value'][1].strip())
        if not pilotJobReferences['OK']:
            return pilotJobReferences
        pilotJobReferences = pilotJobReferences['Value']

        self.log.verbose("JobStamps: %s " % jobStamps)
        self.log.verbose("pilotRefs: %s " % pilotJobReferences)

        result = S_OK(pilotJobReferences)
        result['PilotStampDict'] = dict(zip(pilotJobReferences, jobStamps))
        self.log.verbose("Result for submission: %s " % result)
        return result
Пример #36
0
    def getJobLog(self, jobID):
        """Get pilot job logging info

        :param str jobID: pilot job identifier
        :return: string representing the logging info of a given pilot job
        """
        # pilotRef may integrate the pilot stamp
        # it has to be removed before being passed in parameter
        jobID = jobID.split(":::")[0]
        cmd = ["glite-ce-job-status", "-L", "2", "%s" % jobID]
        ret = executeGridCommand("", cmd, self.gridEnv)
        if not ret["OK"]:
            return ret

        status, output, error = ret["Value"]
        if status:
            return S_ERROR(error)

        return S_OK(output)
Пример #37
0
    def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse Job Submit stdout to return pilot reference
    """
        start = time.time()
        self.log.verbose("Executing Job Submit for TaskQueue", taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret["OK"]:
            self.log.error("Failed to execute Job Submit:", ret["Message"])
            self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy)
            return False
        if ret["Value"][0] != 0:
            self.log.error("Error executing Job Submit:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy)
            return False
        self.log.info("Job Submit Execution Time: %.2f for TaskQueue %d" % ((time.time() - start), taskQueueID))

        stdout = ret["Value"][1]
        stderr = ret["Value"][2]

        submittedPilot = None

        failed = 1
        rb = ""
        for line in List.fromChar(stdout, "\n"):
            m = re.search("(https:\S+)", line)
            if m:
                glite_id = m.group(1)
                submittedPilot = glite_id
                if not rb:
                    m = re.search("https://(.+):.+", glite_id)
                    rb = m.group(1)
                failed = 0
        if failed:
            self.log.error("Job Submit returns no Reference:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            return False

        self.log.info("Reference %s for TaskQueue %s" % (glite_id, taskQueueID))

        return glite_id, rb
Пример #38
0
  def parseJobSubmitStdout( self, proxy, cmd, taskQueueID, rb ):
    """
      Parse Job Submit stdout to return pilot reference
    """
    start = time.time()
    self.log.verbose( 'Executing Job Submit for TaskQueue', taskQueueID )

    ret = executeGridCommand( proxy, cmd, self.gridEnv )

    if not ret['OK']:
      self.log.error( 'Failed to execute Job Submit:', ret['Message'] )
      self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy )
      return False
    if ret['Value'][0] != 0:
      self.log.error( 'Error executing Job Submit:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy )
      return False
    self.log.info( 'Job Submit Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) )

    stdout = ret['Value'][1]
    stderr = ret['Value'][2]

    submittedPilot = None

    failed = 1
    rb = ''
    for line in List.fromChar( stdout, '\n' ):
      m = re.search( "(https:\S+)", line )
      if ( m ):
        glite_id = m.group( 1 )
        submittedPilot = glite_id
        if not rb:
          m = re.search( "https://(.+):.+", glite_id )
          rb = m.group( 1 )
        failed = 0
    if failed:
      self.log.error( 'Job Submit returns no Reference:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      return False

    self.log.info( 'Reference %s for TaskQueue %s' % ( glite_id, taskQueueID ) )

    return glite_id, rb
Пример #39
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """

        workingDirectory = self.ceParameters['WorkingDirectory']
        fd, idFileName = tempfile.mkstemp(suffix='.ids',
                                          prefix='CREAM_',
                                          dir=workingDirectory)
        idFile = os.fdopen(fd, 'w')
        idFile.write('##CREAMJOBS##')
        for id_ in jobIDList:
            if ":::" in id_:
                ref, stamp = id_.split(':::')
            else:
                ref = id_
            idFile.write('\n' + ref)
        idFile.close()

        cmd = ['glite-ce-job-status', '-n', '-i', '%s' % idFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        os.unlink(idFileName)
        resultDict = {}
        if not result['OK']:
            self.log.error('Failed to get job status', result['Message'])
            return result
        if result['Value'][0]:
            if result['Value'][2]:
                return S_ERROR(result['Value'][2])
            else:
                return S_ERROR('Error while interrogating job statuses')
        if result['Value'][1]:
            resultDict = self.__parseJobStatus(result['Value'][1])

        if not resultDict:
            return S_ERROR('No job statuses returned')

        # If CE does not know about a job, set the status to Unknown
        for job in jobIDList:
            if not resultDict.has_key(job):
                resultDict[job] = 'Unknown'

        return S_OK(resultDict)
Пример #40
0
    def getJobOutput(self, jobID, localDir=None):
        """ Get the specified job standard output and error files. If the localDir is provided,
        the output is returned as file in this directory. Otherwise, the output is returned 
        as strings. 
    """
        if jobID.find(':::') != -1:
            pilotRef, stamp = jobID.split(':::')
        else:
            pilotRef = jobID
            stamp = ''
        if not stamp:
            return S_ERROR('Pilot stamp not defined for %s' % pilotRef)

        arcID = os.path.basename(pilotRef)
        if "WorkingDirectory" in self.ceParameters:
            workingDirectory = os.path.join(
                self.ceParameters['WorkingDirectory'], arcID)
        else:
            workingDirectory = arcID
        outFileName = os.path.join(workingDirectory, '%s.out' % stamp)
        errFileName = os.path.join(workingDirectory, '%s.err' % stamp)

        cmd = ['arcget', '-j', self.ceParameters['JobListFile'], pilotRef]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        output = ''
        if result['OK']:
            if not result['Value'][0]:
                outFile = open(outFileName, 'r')
                output = outFile.read()
                outFile.close()
                os.unlink(outFileName)
                errFile = open(errFileName, 'r')
                error = errFile.read()
                errFile.close()
                os.unlink(errFileName)
            else:
                error = '\n'.join(result['Value'][1:])
                return S_ERROR(error)
        else:
            return S_ERROR('Failed to retrieve output for %s' % jobID)

        return S_OK((output, error))
Пример #41
0
    def __resolveOutputURL(self, pilotRef):
        """Resolve the URL of the pilot output files"""

        cmd = ["glite-ce-job-status", "-L", "2", "%s" % pilotRef, "| grep -i osb"]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        url = ""
        if result["OK"]:
            if not result["Value"][0]:
                output = result["Value"][1]
                for line in output.split("\n"):
                    line = line.strip()
                    if line.find("OSB") != -1:
                        match = re.search(r"\[(.*)\]", line)
                        if match:
                            url = match.group(1)
            if url:
                return S_OK(url)
            return S_ERROR("output URL not found for %s" % pilotRef)
        else:
            return S_ERROR("Failed to retrieve long status for %s" % pilotRef)
Пример #42
0
  def getJobOutput( self, jobID, localDir = None ):
    """ Get the specified job standard output and error files. If the localDir is provided,
        the output is returned as file in this directory. Otherwise, the output is returned 
        as strings. 
    """
    if jobID.find( ':::' ) != -1:
      pilotRef, stamp = jobID.split( ':::' )
    else:
      pilotRef = jobID
      stamp = ''
    if not stamp:
      return S_ERROR( 'Pilot stamp not defined for %s' % pilotRef )

    arcID = os.path.basename(pilotRef)
    if "WorkingDirectory" in self.ceParameters:    
      workingDirectory = os.path.join( self.ceParameters['WorkingDirectory'], arcID )
    else:
      workingDirectory = arcID  
    outFileName = os.path.join( workingDirectory, '%s.out' % stamp )
    errFileName = os.path.join( workingDirectory, '%s.err' % stamp )

    cmd = ['arcget', '-j', self.ceParameters['JobListFile'], pilotRef ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    output = ''
    if result['OK']:
      if not result['Value'][0]:
        outFile = open( outFileName, 'r' )
        output = outFile.read()
        outFile.close()
        os.unlink( outFileName )
        errFile = open( errFileName, 'r' )
        error = errFile.read()
        errFile.close()
        os.unlink( errFileName )
      else:
        error = '\n'.join( result['Value'][1:] )
        return S_ERROR( error )  
    else:
      return S_ERROR( 'Failed to retrieve output for %s' % jobID )

    return S_OK( ( output, error ) )
Пример #43
0
    def getCEStatus(self):
        """ Method to return information on running and pending jobs.
    """
        cmd = [
            'arcstat', '-c', self.ceHost, '-j',
            self.ceParameters['JobListFile']
        ]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        resultDict = {}
        if not result['OK']:
            return result

        if result['Value'][0] == 1 and result['Value'][1] == "No jobs\n":
            result = S_OK()
            result['RunningJobs'] = 0
            result['WaitingJobs'] = 0
            result['SubmittedJobs'] = 0
            return result

        if result['Value'][0]:
            if result['Value'][2]:
                return S_ERROR(result['Value'][2])
            else:
                return S_ERROR('Error while interrogating CE status')
        if result['Value'][1]:
            resultDict = self.__parseJobStatus(result['Value'][1])

        running = 0
        waiting = 0
        for ref in resultDict:
            status = resultDict[ref]
            if status == 'Scheduled':
                waiting += 1
            if status == 'Running':
                running += 1

        result = S_OK()
        result['RunningJobs'] = running
        result['WaitingJobs'] = waiting
        result['SubmittedJobs'] = 0
        return result
Пример #44
0
    def submitFTS2(self, command='glite-transfer-submit', pinTime=False):
        """ submit fts job using FTS2 client """
        if self.FTSGUID:
            return S_ERROR("FTSJob has already been submitted")
        surls = self._surlPairs()
        if not surls:
            return S_ERROR("No files to submit")
        fd, fileName = tempfile.mkstemp()
        surlFile = os.fdopen(fd, 'w')
        surlFile.write(surls)
        surlFile.close()
        submitCommand = command.split() + \
                         [ "-s",
                         self.FTSServer,
                         "-f",
                         fileName,
                         "-o",
                         "-K" ]
        if self.TargetToken:
            submitCommand += ["-t", self.TargetToken]
        if self.SourceToken:
            submitCommand += ["-S", self.SourceToken]
        if pinTime:
            submitCommand += [
                "--copy-pin-lifetime",
                "%d" % pinTime, "--bring-online", '86400'
            ]

        submit = executeGridCommand("", submitCommand)
        os.remove(fileName)
        if not submit["OK"]:
            return submit
        returnCode, output, errStr = submit["Value"]
        if returnCode != 0:
            return S_ERROR(errStr if errStr else output)
        self.FTSGUID = output.replace("\n", "")
        self.Status = "Submitted"
        for ftsFile in self:
            ftsFile.FTSGUID = self.FTSGUID
            ftsFile.Status = "Submitted"
        return S_OK()
Пример #45
0
def getPilotLoggingInfo(grid, pilotRef):
    """
   Get LoggingInfo of a GRID job
  """
    if grid == 'gLite':
        cmd = ['glite-wms-job-logging-info', '-v', '3', '--noint', pilotRef]
    elif grid == 'CREAM':
        cmd = ['glite-ce-job-status', '-L', '2', '%s' % pilotRef]
    else:
        return S_ERROR('Pilot logging not available for %s CEs' % grid)

    gridEnv = getGridEnv()
    ret = executeGridCommand('', cmd, gridEnv)
    if not ret['OK']:
        return ret

    status, output, error = ret['Value']
    if status:
        return S_ERROR(error)

    return S_OK(output)
Пример #46
0
    def _getChildrenReferences(self, proxy, parentReference, taskQueueID):
        """
     Get reference for all Children
    """
        cmd = ['glite-wms-job-status', parentReference]

        start = time.time()
        self.log.verbose('Executing Job Status for TaskQueue', taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute Job Status', ret['Message'])
            return []
        if ret['Value'][0] != 0:
            self.log.error('Error executing Job Status:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            return []
        self.log.info('Job Status Execution Time: %.2f' %
                      (time.time() - start))

        stdout = ret['Value'][1]
        # stderr = ret['Value'][2]

        references = []

        failed = 1
        for line in List.fromChar(stdout, '\n'):
            match = re.search("Status info for the Job : (https:\S+)", line)
            if (match):
                glite_id = match.group(1)
                if glite_id not in references and glite_id != parentReference:
                    references.append(glite_id)
                failed = 0
        if failed:
            error = str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])
            self.log.error('Job Status returns no Child Reference:', error)
            return [parentReference]

        return references
Пример #47
0
  def __resolveOutputURL(self, pilotRef):
    """ Resolve the URL of the pilot output files
    """

    cmd = ['glite-ce-job-status', '-L', '2', '%s' % pilotRef,
           '| grep -i osb']
    result = executeGridCommand(self.proxy, cmd, self.gridEnv)
    url = ''
    if result['OK']:
      if not result['Value'][0]:
        output = result['Value'][1]
        for line in output.split('\n'):
          line = line.strip()
          if line.find('OSB') != -1:
            match = re.search(r'\[(.*)\]', line)
            if match:
              url = match.group(1)
      if url:
        return S_OK(url)
      return S_ERROR('output URL not found for %s' % pilotRef)
    else:
      return S_ERROR('Failed to retrieve long status for %s' % pilotRef)
Пример #48
0
  def getJobStatus(self, jobIDList):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    self.log.verbose("JobIDList: %s" % jobIDList)
    for jobInfo in jobIDList:
      jobID = jobInfo.split(":::")[0]
      #jobRef = jobInfo.split(":::")[1]

      cmd = ['globus-job-status', jobID]
      result = executeGridCommand(self.proxy, cmd, self.gridEnv)

      self.log.info("Result from globus-job-status %s " % str(result))

      if not result['OK']:
        self.log.error('Failed to get job status for jobID', jobID)
        continue
      if result['Value'][0]:
        if result['Value'][2]:
          return S_ERROR(result['Value'][2])
        else:
          return S_ERROR('Error while interrogating job statuses')

      if result['Value'][1]:
        resultDict[jobID] = self.__parseJobStatus(result['Value'][1])

    if not resultDict:
      return S_ERROR('No job statuses returned')

    # If CE does not know about a job, set the status to Unknown
    for jobInfo in jobIDList:
      jobID = jobInfo.split(":::")[0]
      if jobID not in resultDict:
        resultDict[jobInfo] = 'Unknown'

    return S_OK(resultDict)
Пример #49
0
    def getCEStatus(self):
        """ Method to return information on running and pending jobs.
    """
        statusList = [
            'REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING'
        ]
        cmd = [
            'glite-ce-job-status', '-n', '-a', '-e',
            '%s' % self.ceName, '-s',
            '%s' % ':'.join(statusList)
        ]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        resultDict = {}
        if not result['OK']:
            return result
        if result['Value'][0]:
            if result['Value'][2]:
                return S_ERROR(result['Value'][2])
            else:
                return S_ERROR('Error while interrogating CE status')
        if result['Value'][1]:
            resultDict = self.__parseJobStatus(result['Value'][1])

        running = 0
        waiting = 0
        for ref, status in resultDict.items():
            if status == 'Scheduled':
                waiting += 1
            if status == 'Running':
                running += 1

        result = S_OK()
        result['RunningJobs'] = running
        result['WaitingJobs'] = waiting
        result['SubmittedJobs'] = 0
        return result
Пример #50
0
 def __getFullOutput(self):
     comm = [
         'glite-transfer-status', '-s', self.ftsServer, '-l', self.ftsGUID
     ]
     res = executeGridCommand('', comm, self.gridEnv)
     if not res['OK']:
         return res
     returnCode, output, errStr = res['Value']
     # Returns a non zero status if error
     if not returnCode == 0:
         return S_ERROR(errStr)
     statusExp = re.compile("^(\S+)")
     self.requestStatus = re.search(statusExp, output).group(1)
     output = output.replace("%s\n" % self.requestStatus, "", 1)
     toRemove = ["'", "<", ">"]
     for char in toRemove:
         output = output.replace(char, '')
     regExp = re.compile(
         "[ ]+Source:[ ]+(\S+)\n[ ]+Destination:[ ]+(\S+)\n[ ]+State:[ ]+(\S+)\n[ ]+Retries:[ ]+(\d+)\n[ ]+Reason:[ ]+([\S ]+).+?[ ]+Duration:[ ]+(\d+)",
         re.S)
     fileInfo = re.findall(regExp, output)
     for source, target, status, retries, reason, duration in fileInfo:
         lfn = ''
         for candidate in sortList(self.fileDict.keys()):
             if re.search(candidate, source):
                 lfn = candidate
         if not lfn:
             continue
         self.__setFileParameter(lfn, 'Source', source)
         self.__setFileParameter(lfn, 'Target', target)
         self.__setFileParameter(lfn, 'Status', status)
         if reason == '(null)':
             reason = ''
         self.__setFileParameter(lfn, 'Reason', reason.replace("\n", " "))
         self.__setFileParameter(lfn, 'Duration', int(duration))
     return S_OK()
Пример #51
0
    def getJobOutput(self, jobID, localDir=None):
        """ Get the specified job standard output and error files. If the localDir is provided,
        the output is returned as file in this directory. Otherwise, the output is returned
        as strings.
    """
        if jobID.find(':::') != -1:
            pilotRef, stamp = jobID.split(':::')
        else:
            pilotRef = jobID
            stamp = ''
        if not stamp:
            return S_ERROR('Pilot stamp not defined for %s' % pilotRef)

        outURL = self.ceParameters.get('OutputURL', 'gsiftp://localhost')
        if outURL == 'gsiftp://localhost':
            result = self.__resolveOutputURL(pilotRef)
            if not result['OK']:
                return result
            outURL = result['Value']

        outputURL = os.path.join(outURL, '%s.out' % stamp)
        errorURL = os.path.join(outURL, '%s.err' % stamp)
        workingDirectory = self.ceParameters['WorkingDirectory']
        outFileName = os.path.join(workingDirectory,
                                   os.path.basename(outputURL))
        errFileName = os.path.join(workingDirectory,
                                   os.path.basename(errorURL))

        cmd = ['globus-url-copy', '%s' % outputURL, 'file://%s' % outFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        output = ''
        if result['OK']:
            if not result['Value'][0]:
                outFile = open(outFileName, 'r')
                output = outFile.read()
                outFile.close()
                os.unlink(outFileName)
            elif result['Value'][
                    0] == 1 and "No such file or directory" in result['Value'][
                        2]:
                output = "Standard Output is not available on the CREAM service"
                if os.path.exists(outFileName):
                    os.unlink(outFileName)
                return S_ERROR(output)
            else:
                error = '\n'.join(result['Value'][1:])
                return S_ERROR(error)
        else:
            return S_ERROR('Failed to retrieve output for %s' % jobID)

        cmd = ['globus-url-copy', '%s' % errorURL, '%s' % errFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        error = ''
        if result['OK']:
            if not result['Value'][0]:
                errFile = open(errFileName, 'r')
                error = errFile.read()
                errFile.close()
                os.unlink(errFileName)
        elif result['Value'][0] == 1 and "No such file or directory" in result[
                'Value'][2]:
            error = "Standard Error is not available on the CREAM service"
            if os.path.exists(errFileName):
                os.unlink(errFileName)
            return S_ERROR(error)
        else:
            return S_ERROR('Failed to retrieve error for %s' % jobID)

        return S_OK((output, error))
Пример #52
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """
        if self.proxyRenewal % 60 == 0:
            self.proxyRenewal += 1
            statusList = [
                'REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING'
            ]
            cmd = [
                'glite-ce-job-status', '-L', '2', '--all', '-e',
                '%s' % self.ceName, '-s',
                '%s' % ':'.join(statusList)
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if result['OK']:
                delegationIDs = []
                for line in result['Value'][1].split('\n'):
                    if line.find('Deleg Proxy ID') != -1:
                        delegationID = line.split()[-1].replace('[',
                                                                '').replace(
                                                                    ']', '')
                        if delegationID not in delegationIDs:
                            delegationIDs.append(delegationID)
                if delegationIDs:
                    # Renew proxies in batches to avoid timeouts
                    chunkSize = 10
                    for i in xrange(0, len(delegationIDs), chunkSize):
                        chunk = delegationIDs[i:i + chunkSize]
                        cmd = ['glite-ce-proxy-renew', '-e', self.ceName]
                        cmd.extend(chunk)
                        self.log.info('Refreshing proxy for:', ' '.join(chunk))
                        result = executeGridCommand(self.proxy, cmd,
                                                    self.gridEnv)
                        if result['OK']:
                            status, output, error = result['Value']
                            if status:
                                self.log.error(
                                    "Failed to renew proxy delegation",
                                    'Output:\n' + output + '\nError:\n' +
                                    error)

        workingDirectory = self.ceParameters['WorkingDirectory']
        fd, idFileName = tempfile.mkstemp(suffix='.ids',
                                          prefix='CREAM_',
                                          dir=workingDirectory)
        idFile = os.fdopen(fd, 'w')
        idFile.write('##CREAMJOBS##')
        for id_ in jobIDList:
            if ":::" in id_:
                ref, _stamp = id_.split(':::')
            else:
                ref = id_
            idFile.write('\n' + ref)
        idFile.close()

        cmd = ['glite-ce-job-status', '-n', '-i', '%s' % idFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        os.unlink(idFileName)
        resultDict = {}
        if not result['OK']:
            self.log.error('Failed to get job status', result['Message'])
            return result
        if result['Value'][0]:
            if result['Value'][2]:
                return S_ERROR(result['Value'][2])
            return S_ERROR('Error while interrogating job statuses')
        if result['Value'][1]:
            resultDict = self.__parseJobStatus(result['Value'][1])

        if not resultDict:
            return S_ERROR('No job statuses returned')

        # If CE does not know about a job, set the status to Unknown
        for job in jobIDList:
            if job not in resultDict:
                resultDict[job] = 'Unknown'

        return S_OK(resultDict)
Пример #53
0
    def getCEStatus(self, jobIDList=None):
        """ Method to return information on running and pending jobs.

        :param jobIDList: list of job IDs to be considered
        :type jobIDList: python:list
    """
        statusList = [
            'REGISTERED', 'PENDING', 'IDLE', 'RUNNING', 'REALLY-RUNNING'
        ]
        cmd = [
            'glite-ce-job-status', '-n', '-a', '-e',
            '%s' % self.ceName, '-s',
            '%s' % ':'.join(statusList)
        ]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        resultDict = {}
        if not result['OK']:
            return result
        if result['Value'][0]:
            if result['Value'][0] == 11:
                return S_ERROR(
                    'Segmentation fault while calling glite-ce-job-status')
            elif result['Value'][2]:
                return S_ERROR(result['Value'][2])
            elif "Authorization error" in result['Value'][1]:
                return S_ERROR("Authorization error")
            elif "FaultString" in result['Value'][1]:
                res = re.search(r'FaultString=\[([\w\s]+)\]',
                                result['Value'][1])
                fault = ''
                if res:
                    fault = res.group(1)
                detail = ''
                res = re.search(r'FaultDetail=\[([\w\s]+)\]',
                                result['Value'][1])
                if res:
                    detail = res.group(1)
                    return S_ERROR("Error: %s:%s" % (fault, detail))
            else:
                return S_ERROR('Error while interrogating CE status')
        if result['Value'][1]:
            resultDict = self.__parseJobStatus(result['Value'][1])

        running = 0
        waiting = 0
        statusDict = {}
        for ref, status in resultDict.iteritems():
            if jobIDList is not None and ref not in jobIDList:
                continue
            if status == 'Scheduled':
                waiting += 1
            if status == 'Running':
                running += 1
            statusDict[ref] = status

        result = S_OK()
        result['RunningJobs'] = running
        result['WaitingJobs'] = waiting
        result['SubmittedJobs'] = 0
        result['JobStatusDict'] = statusDict
        return result
Пример #54
0
    def submitJob(self, executableFile, proxy, numberOfJobs=1, processors=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(
                executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                | stat.S_IROTH | stat.S_IXOTH)

        batchIDList = []
        stampDict = {}
        if numberOfJobs == 1:
            jdlName, diracStamp = self.__writeJDL(executableFile,
                                                  processors=processors)
            cmd = [
                'glite-ce-job-submit', '-n', '-a', '-N', '-r',
                '%s/%s' % (self.ceName, self.queue),
                '%s' % jdlName
            ]

            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            os.unlink(jdlName)
            if result['OK']:
                if result['Value'][0]:
                    # We have got a non-zero status code
                    errorString = '\n'.join(result['Value'][1:]).strip()
                    return S_ERROR('Pilot submission failed with error: %s ' %
                                   errorString)
                pilotJobReference = result['Value'][1].strip()
                if not pilotJobReference:
                    return S_ERROR(
                        'No pilot reference returned from the glite job submission command'
                    )
                if not pilotJobReference.startswith('https'):
                    return S_ERROR('Invalid pilot reference %s' %
                                   pilotJobReference)
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp
        else:
            delegationID = makeGuid()
            cmd = [
                'glite-ce-delegate-proxy', '-e',
                '%s' % self.ceName,
                '%s' % delegationID
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if not result['OK']:
                self.log.error('Failed to delegate proxy', result['Message'])
                return result
            for _i in range(numberOfJobs):
                jdlName, diracStamp = self.__writeJDL(executableFile,
                                                      processors=processors)
                cmd = [
                    'glite-ce-job-submit', '-n', '-N', '-r',
                    '%s/%s' % (self.ceName, self.queue), '-D',
                    '%s' % delegationID,
                    '%s' % jdlName
                ]
                result = executeGridCommand(self.proxy, cmd, self.gridEnv)
                os.unlink(jdlName)
                if not result['OK']:
                    self.log.error(
                        "General error in execution of glite-ce-job-submit command"
                    )
                    break
                if result['Value'][0] != 0:
                    self.log.error("Error in glite-ce-job-submit command",
                                   result['Value'][1] + result['Value'][2])
                    break
                pilotJobReference = result['Value'][1].strip()
                if pilotJobReference and pilotJobReference.startswith('https'):
                    batchIDList.append(pilotJobReference)
                    stampDict[pilotJobReference] = diracStamp
                else:
                    break
        if batchIDList:
            result = S_OK(batchIDList)
            result['PilotStampDict'] = stampDict
        else:
            result = S_ERROR(
                'No pilot references obtained from the glite job submission')
        return result
Пример #55
0
    def getJobStatus(self, jobIDList):
        """Get the status information for the given list of jobs"""
        if self.proxyRenewal % 60 == 0:
            self.proxyRenewal += 1
            statusList = ["REGISTERED", "PENDING", "IDLE", "RUNNING", "REALLY-RUNNING"]
            cmd = [
                "glite-ce-job-status",
                "-L",
                "2",
                "--all",
                "-e",
                "%s" % self.ceName,
                "-s",
                "%s" % ":".join(statusList),
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if result["OK"]:
                delegationIDs = []
                for line in result["Value"][1].split("\n"):
                    if line.find("Deleg Proxy ID") != -1:
                        delegationID = line.split()[-1].replace("[", "").replace("]", "")
                        if delegationID not in delegationIDs:
                            delegationIDs.append(delegationID)
                if delegationIDs:
                    # Renew proxies in batches to avoid timeouts
                    chunkSize = 10
                    for i in range(0, len(delegationIDs), chunkSize):
                        chunk = delegationIDs[i : i + chunkSize]
                        cmd = ["glite-ce-proxy-renew", "-e", self.ceName]
                        cmd.extend(chunk)
                        self.log.info("Refreshing proxy for:", " ".join(chunk))
                        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
                        if result["OK"]:
                            status, output, error = result["Value"]
                            if status:
                                self.log.error(
                                    "Failed to renew proxy delegation", "Output:\n" + output + "\nError:\n" + error
                                )

        workingDirectory = self.ceParameters["WorkingDirectory"]
        fd, idFileName = tempfile.mkstemp(suffix=".ids", prefix="CREAM_", dir=workingDirectory)
        idFile = os.fdopen(fd, "w")
        idFile.write("##CREAMJOBS##")
        for id_ in jobIDList:
            if ":::" in id_:
                ref, _stamp = id_.split(":::")
            else:
                ref = id_
            idFile.write("\n" + ref)
        idFile.close()

        cmd = ["glite-ce-job-status", "-n", "-i", "%s" % idFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        os.unlink(idFileName)
        resultDict = {}
        if not result["OK"]:
            self.log.error("Failed to get job status", result["Message"])
            return result
        if result["Value"][0]:
            if result["Value"][2]:
                return S_ERROR(result["Value"][2])
            return S_ERROR("Error while interrogating job statuses")
        if result["Value"][1]:
            resultDict = self.__parseJobStatus(result["Value"][1])

        if not resultDict:
            return S_ERROR("No job statuses returned")

        # If CE does not know about a job, set the status to Unknown
        for job in jobIDList:
            if job not in resultDict:
                resultDict[job] = PilotStatus.UNKNOWN

        return S_OK(resultDict)
Пример #56
0
def getWMSPilotOutput( proxy, grid, pilotRef ):
  """
   Get Output of a GRID job
  """
  tmp_dir = mkdtemp()
  if grid == 'LCG':
    cmd = [ 'edg-job-get-output' ]
  elif grid == 'gLite':
    cmd = [ 'glite-wms-job-output' ]
  else:
    return S_ERROR( 'Unknown GRID %s' % grid )

  cmd.extend( ['--noint', '--dir', tmp_dir, pilotRef] )

  gridEnv = getGridEnv()

  ret = executeGridCommand( proxy, cmd, gridEnv )
  if not ret['OK']:
    shutil.rmtree( tmp_dir )
    return ret

  status, output, error = ret['Value']

  for errorString in [ 'already retrieved',
                       'Output not yet Ready',
                       'not yet ready',
                       'the status is ABORTED' ]:
    if error.find( errorString ) != -1:
      shutil.rmtree( tmp_dir )
      return S_ERROR( error )

  if status:
    shutil.rmtree( tmp_dir )
    return S_ERROR( error )

  # Get the list of files

  # LCG always creates an unique sub-directory
  # gLite does it too now
  result = executeGridCommand( proxy, ['glite-version'], gridEnv )
  if not result['OK']:
    shutil.rmtree( tmp_dir )
    return result
  status, output, error = result['Value']
  if output.find( '3.2' ) != -1:
    tmp_dir = os.path.join( tmp_dir, os.listdir( tmp_dir )[0] )

  result = S_OK()
  result['FileList'] = outputSandboxFiles

  for filename in outputSandboxFiles:
    tmpname = os.path.join( tmp_dir, filename )
    if os.path.exists( tmpname ):
      myfile = file( tmpname, 'r' )
      f = myfile.read()
      myfile.close()
    else:
      f = ''
    # HACK: removed after the current scheme has been in production for at least 1 week
    if filename == 'std.out' and f:
      filename = 'StdOut'
    if filename == 'std.err' and f:
      filename = 'StdErr'
    result[filename] = f

  shutil.rmtree( tmp_dir )
  return result
Пример #57
0
    def monitorFTS2(self, full=False):
        """ monitor fts job """
        if not self.FTSGUID:
            return S_ERROR("FTSGUID not set, FTS job not submitted?")

        monitorCommand = [
            "glite-transfer-status", "--verbose", "-s", self.FTSServer,
            self.FTSGUID
        ]

        if full:
            monitorCommand.append("-l")

        monitor = executeGridCommand("", monitorCommand)
        if not monitor["OK"]:
            return monitor
        returnCode, outputStr, errStr = monitor["Value"]

        # Returns a non zero status if error
        if returnCode != 0:
            return S_ERROR(errStr)

        outputStr = outputStr.replace("'", "").replace("<",
                                                       "").replace(">", "")

        # # set FTS job status
        regExp = re.compile("Status:\s+(\S+)")

        self.Status = re.search(regExp, outputStr).group(1)

        statusSummary = {}
        for state in FTSFile.ALL_STATES:
            regExp = re.compile("\s+%s:\s+(\d+)" % state)
            if regExp.search(outputStr):
                statusSummary[state] = int(
                    re.search(regExp, outputStr).group(1))

        total = sum(statusSummary.values())
        completed = sum(
            [statusSummary.get(state, 0) for state in FTSFile.FINAL_STATES])
        self.Completeness = 100 * completed / total

        if not full:
            return S_OK(statusSummary)

        regExp = re.compile(
            "[ ]+Source:[ ]+(\S+)\n[ ]+Destination:[ ]+(\S+)\n[ ]+State:[ ]+(\S+)\n[ ]+Retries:[ ]+(\d+)\n[ ]+Reason:[ ]+([\S ]+).+?[ ]+Duration:[ ]+(\d+)",
            re.S)
        fileInfo = re.findall(regExp, outputStr)
        for sourceURL, targetURL, fileStatus, retries, reason, duration in fileInfo:
            candidateFile = None
            for ftsFile in self:
                if ftsFile.SourceSURL == sourceURL:
                    candidateFile = ftsFile
                    break
            if not candidateFile:
                continue
            candidateFile.Status = fileStatus
            candidateFile.Error = reason

            if candidateFile.Status == "Failed":
                for missingSource in self.missingSourceErrors:
                    if missingSource.match(reason):
                        candidateFile.Error = "MissingSource"

        # # register successful files
        if self.Status in FTSJob.FINALSTATES:
            return self.finalize()

        return S_OK()
Пример #58
0
    def getJobOutput(self, jobID):
        """Get the specified job standard output and error files. The output is returned
        as strings.
        """
        if jobID.find(":::") != -1:
            pilotRef, stamp = jobID.split(":::")
        else:
            pilotRef = jobID
            stamp = ""
        if not stamp:
            return S_ERROR("Pilot stamp not defined for %s" % pilotRef)

        outURL = self.ceParameters.get("OutputURL", "gsiftp://localhost")
        if outURL == "gsiftp://localhost":
            result = self.__resolveOutputURL(pilotRef)
            if not result["OK"]:
                return result
            outURL = result["Value"]

        outputURL = os.path.join(outURL, "%s.out" % stamp)
        errorURL = os.path.join(outURL, "%s.err" % stamp)
        workingDirectory = self.ceParameters["WorkingDirectory"]
        outFileName = os.path.join(workingDirectory, os.path.basename(outputURL))
        errFileName = os.path.join(workingDirectory, os.path.basename(errorURL))

        cmd = ["globus-url-copy", "%s" % outputURL, "file://%s" % outFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        output = ""
        if result["OK"]:
            if not result["Value"][0]:
                outFile = open(outFileName, "r")
                output = outFile.read()
                outFile.close()
                os.unlink(outFileName)
            elif result["Value"][0] == 1 and "No such file or directory" in result["Value"][2]:
                output = "Standard Output is not available on the CREAM service"
                if os.path.exists(outFileName):
                    os.unlink(outFileName)
                return S_ERROR(output)
            else:
                error = "\n".join(result["Value"][1:])
                return S_ERROR(error)
        else:
            return S_ERROR("Failed to retrieve output for %s" % jobID)

        cmd = ["globus-url-copy", "%s" % errorURL, "%s" % errFileName]
        result = executeGridCommand(self.proxy, cmd, self.gridEnv)
        error = ""
        if result["OK"]:
            if not result["Value"][0]:
                errFile = open(errFileName, "r")
                error = errFile.read()
                errFile.close()
                os.unlink(errFileName)
        elif result["Value"][0] == 1 and "No such file or directory" in result["Value"][2]:
            error = "Standard Error is not available on the CREAM service"
            if os.path.exists(errFileName):
                os.unlink(errFileName)
            return S_ERROR(error)
        else:
            return S_ERROR("Failed to retrieve error for %s" % jobID)

        return S_OK((output, error))
Пример #59
0
    def submitJob(self, executableFile, proxy, numberOfJobs=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(executableFile, 0755)

        batchIDList = []
        stampDict = {}
        if numberOfJobs == 1:
            jdlName, diracStamp = self.__writeJDL(executableFile)
            cmd = [
                'glite-ce-job-submit', '-n', '-a', '-N', '-r',
                '%s/%s' % (self.ceName, self.queue),
                '%s' % jdlName
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)

            if result['OK']:
                if result['Value'][0]:
                    # We have got a non-zero status code
                    return S_ERROR('Pilot submission failed with error: %s ' %
                                   result['Value'][2].strip())
                pilotJobReference = result['Value'][1].strip()
                if not pilotJobReference:
                    return S_ERROR(
                        'No pilot reference returned from the glite job submission command'
                    )
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp
            os.unlink(jdlName)
        else:
            delegationID = makeGuid()
            cmd = [
                'glite-ce-delegate-proxy', '-e',
                '%s' % self.ceName,
                '%s' % delegationID
            ]
            result = executeGridCommand(self.proxy, cmd, self.gridEnv)
            if not result['OK']:
                self.log.error('Failed to delegate proxy: %s' %
                               result['Message'])
                return result
            for i in range(numberOfJobs):
                jdlName, diracStamp = self.__writeJDL(executableFile)
                cmd = [
                    'glite-ce-job-submit', '-n', '-N', '-r',
                    '%s/%s' % (self.ceName, self.queue), '-D',
                    '%s' % delegationID,
                    '%s' % jdlName
                ]
                result = executeGridCommand(self.proxy, cmd, self.gridEnv)
                os.unlink(jdlName)
                if not result['OK']:
                    break
                if result['Value'][0] != 0:
                    break
                pilotJobReference = result['Value'][1].strip()
                if pilotJobReference:
                    batchIDList.append(pilotJobReference)
                    stampDict[pilotJobReference] = diracStamp
                else:
                    break

        os.unlink(executableFile)
        if batchIDList:
            result = S_OK(batchIDList)
            result['PilotStampDict'] = stampDict
        else:
            result = S_ERROR(
                'No pilot references obtained from the glite job submission')
        return result
Пример #60
0
    def monitorFTS2(self, command="glite-transfer-status", full=False):
        """ monitor fts job """
        if not self.FTSGUID:
            return S_ERROR("FTSGUID not set, FTS job not submitted?")

        monitorCommand = command.split() + \
                           ["--verbose",
                           "-s",
                           self.FTSServer,
                           self.FTSGUID ]

        if full:
            monitorCommand.append("-l")

        monitor = executeGridCommand("", monitorCommand)
        if not monitor["OK"]:
            return monitor
        returnCode, outputStr, errStr = monitor["Value"]

        # Returns a non zero status if error
        if returnCode != 0:
            if 'was not found' in outputStr and not errStr:
                errStr = 'Job was not found'
            return S_ERROR(errStr)

        outputStr = outputStr.replace("'", "").replace("<",
                                                       "").replace(">", "")

        # # set FTS job status
        regExp = re.compile("Status:\\s+(\\S+)")

        # with FTS3 this can be uppercase
        self.Status = re.search(regExp, outputStr).group(1)

        statusSummary = {}
        # This is capitalized, even in FTS3!
        for state in FTSFile.ALL_STATES:
            regExp = re.compile("\\s+%s:\\s+(\\d+)" % state)
            if regExp.search(outputStr):
                statusSummary[state] = int(
                    re.search(regExp, outputStr).group(1))

        total = sum(statusSummary.values())
        completed = sum(
            [statusSummary.get(state, 0) for state in FTSFile.FINAL_STATES])
        self.Completeness = 100 * completed / total if total else 0

        if not full:
            return S_OK(statusSummary)

        # The order of informations is not the same for glite- and fts- !!!
        # In order: new fts-, old fts-, glite-
        iExptr = None
        for iExptr, exptr in enumerate(
            ('[ ]+Source:[ ]+(\\S+)\n[ ]+Destination:[ ]+(\\S+)\n[ ]+State:[ ]+(\\S+)\n[ ]+Reason:[ ]+([\\S ]+).+?[ ]+Duration:[ ]+(\\d+)\n[ ]+Staging:[ ]+(\\d+)\n[ ]+Retries:[ ]+(\\d+)',
             '[ ]+Source:[ ]+(\\S+)\n[ ]+Destination:[ ]+(\\S+)\n[ ]+State:[ ]+(\\S+)\n[ ]+Reason:[ ]+([\\S ]+).+?[ ]+Duration:[ ]+(\\d+)\n[ ]+Retries:[ ]+(\\d+)',
             '[ ]+Source:[ ]+(\\S+)\n[ ]+Destination:[ ]+(\\S+)\n[ ]+State:[ ]+(\\S+)\n[ ]+Retries:[ ]+(\\d+)\n[ ]+Reason:[ ]+([\\S ]+).+?[ ]+Duration:[ ]+(\\d+)'
             )):
            regExp = re.compile(exptr, re.S)
            fileInfo = re.findall(regExp, outputStr)
            if fileInfo:
                break
        if not fileInfo:
            return S_ERROR("Error monitoring job (no regexp match)")
        for info in fileInfo:
            if iExptr == 0:
                # version >= 3.2.30
                sourceURL, targetURL, fileStatus, reason, duration, _retries, _staging = info
            elif iExptr == 1:
                # version FTS3 < 3.2.30
                sourceURL, targetURL, fileStatus, reason, duration, _retries = info
            elif iExptr == 2:
                # version FTS2
                sourceURL, targetURL, fileStatus, _retries, reason, duration = info
            else:
                return S_ERROR('Error monitoring job (implement match %d)' %
                               iExptr)
            candidateFile = None
            for ftsFile in self:
                if ftsFile.SourceSURL == sourceURL:
                    candidateFile = ftsFile
                    break
            if not candidateFile:
                continue
            # Can be uppercase for FTS3
            if not candidateFile.TargetSURL:
                candidateFile.TargetSURL = targetURL
            candidateFile.Status = fileStatus
            candidateFile.Error = reason
            candidateFile._duration = duration

            if candidateFile.Status == "Failed":
                for missingSource in self.missingSourceErrors:
                    if missingSource.match(reason):
                        candidateFile.Error = "MissingSource"
            # If the staging info was present, record it
            if len(info) > 6:
                candidateFile._staging = info[6]
        # # register successful files
        if self.Status in FTSJob.FINALSTATES:
            return self.finalize()

        return S_OK()