示例#1
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  """
  ## transfer DB handle
  transferDB = None

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()

  def execute( self ):
    """ execution in one cycle """

    #########################################################################
    #  Get the details for all active FTS requests
    self.log.info( 'Obtaining requests to monitor' )
    res = self.transferDB.getFTSReq()
    if not res['OK']:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res['Value']:
      self.log.info( "FTSMonitorAgent. No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res['Value']
    self.log.info( 'Found %s FTS jobs' % len( ftsReqs ) )

    #######################################################################
    # Check them all....
    i = 1
    for ftsReqDict in ftsReqs:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting monitoring loop %s of %s\n\n" % ( infoStr, i, len( ftsReqs ) )
      self.log.info( infoStr )
      res = self.monitorTransfer( ftsReqDict )
      i += 1
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer  obtained from TransferDB """

    ftsReqID = ftsReqDict['FTSReqID']
    ftsGUID = ftsReqDict['FTSGuid']
    ftsServer = ftsReqDict['FTSServer']
    channelID = ftsReqDict['ChannelID']
    sourceSE = ftsReqDict['SourceSE']
    targetSE = ftsReqDict['TargetSE']

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    self.log.info( 'Perform summary update of the FTS Request' )
    infoStr = "Monitoring FTS Job:\n\n"
    infoStr = "%sglite-transfer-status -s %s -l %s\n" % ( infoStr, ftsServer, ftsGUID )
    infoStr = "%s%s%s\n" % ( infoStr, 'FTS GUID:'.ljust( 20 ), ftsGUID )
    infoStr = "%s%s%s\n\n" % ( infoStr, 'FTS Server:'.ljust( 20 ), ftsServer )
    self.log.info( infoStr )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res['OK']:
      self.log.error( "Failed to update the FTS request summary", res['Message'] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        self.log.error("FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side")

        ## get fileIDs
        fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
        if not fileIDs["OK"]:
          self.log.error("Unable to retrieve FileIDs associated to %s request" % ftsReqID )
          return fileIDs
        fileIDs = fileIDs["Value"]
      
        ## update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
          fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID, 
                                                                  "Status", "Failed" )
          if not fileStatus["OK"]:
            self.log.error("Unable to set FileToFTS status to Failed for FileID %s: %s" % ( fileID, 
                                                                                           fileStatus["Message"] ) )
                          
          failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID, 
                                                                  "Reason", "FTS job expired on server" )
          if not failReason["OK"]:
            self.log.error("Unable to set FileToFTS reason for FileID %s: %s" % ( fileID, 
                                                                                 failReason["Message"] ) )

        ## update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
        if not resetChannels["OK"]:
          self.log.error("Failed to reset Channel table for files to retry")
          return resetChannels   

        ## update FTSReq table
        self.log.info( 'Setting FTS request status to Finished' )
        ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
        if not ftsReqStatus['OK']:
          self.log.error( 'Failed update FTS Request status', ftsReqStatus['Message'] )
          return ftsReqStatus
        ## if we land here, everything should be OK
        return S_OK()

      return res
    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      self.log.error( "Failed to get FTS request summary", res['Message'] )
      return res
    self.log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      self.log.error( "Failed to get FTS percentage complete", res['Message'] )
      return res
    self.log.info( 'FTS Request found to be %.1f percent complete' % res['Value'] )
    self.transferDB.setFTSReqAttribute( ftsReqID, 'PercentageComplete', res['Value'] )
    self.transferDB.addLoggingEvent( ftsReqID, res['Value'] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res['OK']:
      self.log.error( "Failed to determine whether FTS request terminal", res['Message'] )
      return res
    if not res['Value']:
      return S_OK()
    self.log.info( 'FTS Request found to be terminal, updating file states' )

    #########################################################################
    # Get the LFNS associated to the FTS request
    self.log.info( 'Obtaining the LFNs associated to this request' )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res['OK']:
      self.log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res['Value']
    if not files:
      self.log.error( 'No files present for transfer' )
      return S_ERROR( 'No files were found in the DB' )
    lfns = files.keys()
    self.log.info( 'Obtained %s files' % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res['OK']:
      self.log.error( "Failed to perform detailed monitoring of FTS request", res['Message'] )
      return res
    res = oFTSRequest.getFailed()
    if not res['OK']:
      self.log.error( "Failed to obtained failed files for FTS request", res['Message'] )
      return res
    failedFiles = res['Value']
    res = oFTSRequest.getDone()
    if not res['OK']:
      self.log.error( "Failed to obtained successful files for FTS request", res['Message'] )
      return res
    completedFiles = res['Value']

    # An LFN can be included more than once if it was entered into more than one Request. 
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res['OK']:
      self.log.error( 'Failed to get FileIDs associated to FTS Request', res['Message'] )
      return res
    fileIDs = res['Value']
    res = self.transferDB.getAttributesForFilesList( fileIDs, ['LFN'] )
    if not res['OK']:
      self.log.error( 'Failed to get LFNs associated to FTS Request', res['Message'] )
      return res
    fileIDDict = res['Value']

    fileToFTSUpdates = []
    completedFileIDs = []

    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res['OK']:
          transferTime = res['Value']
        fileToFTSUpdates.append( ( fileID, 'Completed', '', 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ''
        res = oFTSRequest.getFailReason( lfn )
        if res['OK']:
          failReason = res['Value']
        if self.missingSource( failReason ):
          self.log.error( 'The source SURL does not exist.', '%s %s' % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        self.log.error( 'Failed to replicate file on channel.', "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, 'Failed', failReason, 0, 0 ) )

    allUpdated = True
    if filesToRetry:
      self.log.info( 'Updating the Channel table for files to retry' )
      res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
        allUpdated = False
    for fileID in filesToFail:
      self.log.info( 'Updating the Channel table for files to reschedule' )
      res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
      if not res['OK']:
        self.log.error( 'Failed to update Channel table for failed files.', res['Message'] )
        allUpdated = False

    if completedFileIDs:
      self.log.info( 'Updating the Channel table for successful files' )
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for successful files.', res['Message'] )
        allUpdated = False
      self.log.info( 'Updating the Channel table for ancestors of successful files' )
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      self.log.info( 'Updating the FileToFTS table for files' )
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res['OK']:
        self.log.error( 'Failed to update the FileToFTS table for files.', res['Message'] )
        allUpdated = False

    if allUpdated:
      res = oFTSRequest.finalize()
      if not res['OK']:
        self.log.error( "Failed to perform the finalization for the FTS request", res['Message'] )
        return res

      self.log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        self.log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      res = oFTSRequest.getFailedRegistrations()
      failedRegistrations = res['Value']
      regFailedFileIDs = []
      regDoneFileIDs = []
      regForgetFileIDs = []
      for fileID, fileDict in fileIDDict.items():
        lfn = fileDict['LFN']
        if lfn in failedRegistrations:
          self.log.info( 'Setting failed registration in FileToCat back to Waiting', lfn )
          regFailedFileIDs.append( fileID )
          # if the LFN appears more than once, FileToCat needs to be reset only once 
          del failedRegistrations[lfn]
        elif lfn in completedFiles:
          regDoneFileIDs.append( fileID )
        elif fileID in filesToFail:
          regForgetFileIDs.append( fileID )


      if regFailedFileIDs:
        res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to reset entries in FileToCat', res['Message'] )
          return res

      if regDoneFileIDs:
        res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to set entries Done in FileToCat', res['Message'] )
          return res

      if regForgetFileIDs:
        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to set entries Done in FileToCat', res['Message'] )
          return res

      self.log.info( 'Updating FTS request status' )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        self.log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()

  def missingSource( self, failReason ):
    """ check if message sent by FTS server is concering missing source file 
    
    :param self: self reference
    :param str failReason: message sent by FTS server
    """
    missingSourceErrors = [ 
      'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
      'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
      'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
      'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
      'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
      'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist' ]
    for error in missingSourceErrors:
      if re.search( error, failReason ):
        return 1
    return 0
示例#2
0
class FTSMonitorAgent(AgentModule):
    def initialize(self):
        self.TransferDB = TransferDB()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):

        #########################################################################
        #  Get the details for all active FTS requests
        gLogger.info('Obtaining requests to monitor')
        res = self.TransferDB.getFTSReq()
        if not res['OK']:
            gLogger.error("Failed to get FTS requests", res['Message'])
            return res
        if not res['Value']:
            gLogger.info("FTSMonitorAgent. No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res['Value']
        gLogger.info('Found %s FTS jobs' % len(ftsReqs))

        #######################################################################
        # Check them all....
        i = 1
        for ftsReqDict in ftsReqs:
            infoStr = "\n\n##################################################################################\n\n"
            infoStr = "%sStarting monitoring loop %s of %s\n\n" % (
                infoStr, i, len(ftsReqs))
            gLogger.info(infoStr)
            res = self.monitorTransfer(ftsReqDict)
            i += 1
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ Monitors transfer  obtained from TransferDB
    """

        ftsReqID = ftsReqDict['FTSReqID']
        ftsGUID = ftsReqDict['FTSGuid']
        ftsServer = ftsReqDict['FTSServer']
        channelID = ftsReqDict['ChannelID']
        sourceSE = ftsReqDict['SourceSE']
        targetSE = ftsReqDict['TargetSE']

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        gLogger.info('Perform summary update of the FTS Request')
        infoStr = "Monitoring FTS Job:\n\n"
        infoStr = "%sglite-transfer-status -s %s -l %s\n" % (
            infoStr, ftsServer, ftsGUID)
        infoStr = "%s%s%s\n" % (infoStr, 'FTS GUID:'.ljust(20), ftsGUID)
        infoStr = "%s%s%s\n\n" % (infoStr, 'FTS Server:'.ljust(20), ftsServer)
        gLogger.info(infoStr)
        res = oFTSRequest.summary()
        self.TransferDB.setFTSReqLastMonitor(ftsReqID)
        if not res['OK']:
            gLogger.error("Failed to update the FTS request summary",
                          res['Message'])
            return res
        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            gLogger.error("Failed to get FTS request summary", res['Message'])
            return res
        gLogger.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            gLogger.error("Failed to get FTS percentage complete",
                          res['Message'])
            return res
        gLogger.info('FTS Request found to be %.1f percent complete' %
                     res['Value'])
        self.TransferDB.setFTSReqAttribute(ftsReqID, 'PercentageComplete',
                                           res['Value'])
        self.TransferDB.addLoggingEvent(ftsReqID, res['Value'])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res['OK']:
            gLogger.error("Failed to determine whether FTS request terminal",
                          res['Message'])
            return res
        if not res['Value']:
            return S_OK()
        gLogger.info('FTS Request found to be terminal, updating file states')

        #########################################################################
        # Get the LFNS associated to the FTS request
        gLogger.info('Obtaining the LFNs associated to this request')
        res = self.TransferDB.getFTSReqLFNs(ftsReqID)
        if not res['OK']:
            gLogger.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res['Value']
        if not files:
            gLogger.error('No files present for transfer')
            return S_ERROR('No files were found in the DB')
        lfns = files.keys()
        gLogger.info('Obtained %s files' % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res['OK']:
            gLogger.error(
                "Failed to perform detailed monitoring of FTS request",
                res['Message'])
            return res
        res = oFTSRequest.getFailed()
        if not res['OK']:
            gLogger.error("Failed to obtained failed files for FTS request",
                          res['Message'])
            return res
        failedFiles = res['Value']
        res = oFTSRequest.getDone()
        if not res['OK']:
            gLogger.error(
                "Failed to obtained successful files for FTS request",
                res['Message'])
            return res
        completedFiles = res['Value']

        fileToFTSUpdates = []
        completedFileIDs = []
        for lfn in completedFiles:
            fileID = files[lfn]
            completedFileIDs.append(fileID)
            transferTime = 0
            res = oFTSRequest.getTransferTime(lfn)
            if res['OK']:
                transferTime = res['Value']
            fileToFTSUpdates.append((fileID, 'Completed', '', 0, transferTime))

        filesToRetry = []
        filesToReschedule = []
        for lfn in failedFiles:
            fileID = files[lfn]
            failReason = ''
            res = oFTSRequest.getFailReason(lfn)
            if res['OK']:
                failReason = res['Value']
            if self.missingSource(failReason):
                gLogger.error('The source SURL does not exist.',
                              '%s %s' % (lfn, oFTSRequest.getSourceSURL(lfn)))
                filesToReschedule.append(fileID)
            else:
                filesToRetry.append(fileID)
            gLogger.error('Failed to replicate file on channel.',
                          "%s %s" % (channelID, failReason))
            fileToFTSUpdates.append((fileID, 'Failed', failReason, 0, 0))

        allUpdated = True
        if filesToRetry:
            gLogger.info('Updating the Channel table for files to retry')
            res = self.TransferDB.resetFileChannelStatus(
                channelID, filesToRetry)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for file to retry.',
                    res['Message'])
                allUpdated = False
        for fileID in filesToReschedule:
            gLogger.info('Updating the Channel table for files to reschedule')
            res = self.TransferDB.setFileChannelStatus(channelID, fileID,
                                                       'Failed')
            if not res['OK']:
                gLogger.error(
                    'Failed to update Channel table for failed files.',
                    res['Message'])
                allUpdated = False

        if completedFileIDs:
            gLogger.info('Updating the Channel table for successful files')
            res = self.TransferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for successful files.',
                    res['Message'])
                allUpdated = False
            gLogger.info(
                'Updating the Channel table for ancestors of successful files')
            res = self.TransferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

            #
            #  cibak: temporary switch off of regitstration in FileToCat
            #
            #gLogger.info( 'Updating the FileToCat table for successful files' )
            #res = self.TransferDB.setRegistrationWaiting( channelID, completedFileIDs )
            #if not res['OK']:
            #  gLogger.error( 'Failed to update the FileToCat table for successful files.', res['Message'] )
            #  allUpdated = False

        if fileToFTSUpdates:
            gLogger.info('Updating the FileToFTS table for files')
            res = self.TransferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the FileToFTS table for files.',
                    res['Message'])
                allUpdated = False

        if allUpdated:
            res = oFTSRequest.finalize()
            if not res['OK']:
                gLogger.error(
                    "Failed to perform the finalization for the FTS request",
                    res['Message'])
                return res

            gLogger.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.TransferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                gLogger.error('Failed to add logging event for FTS Request',
                              res['Message'])

            gLogger.info('Updating FTS request status')
            res = self.TransferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                gLogger.error('Failed update FTS Request status',
                              res['Message'])
        return S_OK()

    def missingSource(self, failReason):
        missingSourceErrors = [
            'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
            'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
            'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
            'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
            'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
            'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist'
        ]
        for error in missingSourceErrors:
            if re.search(error, failReason):
                return 1
        return 0
示例#3
0
class FTSMonitorAgent(AgentModule):
    """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
    # # transfer DB handle
    transferDB = None
    # # thread pool
    threadPool = None
    # # min threads
    minThreads = 1
    # # max threads
    maxThreads = 10

    # # missing source regexp patterns
    missingSourceErrors = [
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
      re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
                 " Command failed. : open error: No such file or directory" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

    def initialize(self):
        """ agent's initialisation """
        self.transferDB = TransferDB()
        self.am_setOption("shifterProxy", "DataManager")
        self.minThreads = self.am_getOption("MinThreads", self.minThreads)
        self.maxThreads = self.am_getOption("MaxThreads", self.maxThreads)
        minmax = (abs(self.minThreads), abs(self.maxThreads))
        self.minThreads, self.maxThreads = min(minmax), max(minmax)
        self.log.info("ThreadPool min threads = %s" % self.minThreads)
        self.log.info("ThreadPool max threads = %s" % self.maxThreads)
        self.threadPool = ThreadPool(self.minThreads, self.maxThreads)
        self.threadPool.daemonize()
        return S_OK()

    def execute(self):
        """ push jobs to the thread pool """
        self.log.info("Obtaining requests to monitor")
        res = self.transferDB.getFTSReq()
        if not res["OK"]:
            self.log.error("Failed to get FTS requests", res['Message'])
            return res
        if not res["Value"]:
            self.log.info("No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res["Value"]
        self.log.info("Found %s FTS jobs" % len(ftsReqs))
        i = 1
        for ftsJob in ftsReqs:
            while True:
                self.log.debug("submitting FTS Job %s FTSReqID=%s to monitor" %
                               (i, ftsJob["FTSReqID"]))
                ret = self.threadPool.generateJobAndQueueIt(
                    self.monitorTransfer,
                    args=(ftsJob, ),
                )
                if ret["OK"]:
                    i += 1
                    break
                # # sleep 1 second to proceed
                time.sleep(1)

        self.threadPool.processAllResults()
        return S_OK()

    def ftsJobExpired(self, ftsReqID, channelID):
        """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
        log = gLogger.getSubLogger("@%s" % str(ftsReqID))
        fileIDs = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not fileIDs["OK"]:
            log.error("Unable to retrieve FileIDs associated to %s request" %
                      ftsReqID)
            return fileIDs
        fileIDs = fileIDs["Value"]

        # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
            fileStatus = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Status", "Failed")
            if not fileStatus["OK"]:
                log.error(
                    "Unable to set FileToFTS status to 'Failed' for FileID %s: %s"
                    % (fileID, fileStatus["Message"]))

            failReason = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Reason", "FTS job expired on server")
            if not failReason["OK"]:
                log.error("Unable to set FileToFTS reason for FileID %s: %s" %
                          (fileID, failReason["Message"]))
        # # update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus(
            channelID, fileIDs)
        if not resetChannels["OK"]:
            log.error("Failed to reset Channel table for files to retry")
            return resetChannels

        # # update FTSReq table
        log.info("Setting FTS request status to 'Finished'")
        ftsReqStatus = self.transferDB.setFTSReqStatus(ftsReqID, "Finished")
        if not ftsReqStatus["OK"]:
            log.error("Failed update FTS Request status",
                      ftsReqStatus["Message"])
            return ftsReqStatus

        # # if we land here, everything should be OK
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
        ftsReqID = ftsReqDict.get("FTSReqID")
        ftsGUID = ftsReqDict.get("FTSGuid")
        ftsServer = ftsReqDict.get("FTSServer")
        channelID = ftsReqDict.get("ChannelID")
        sourceSE = ftsReqDict.get("SourceSE")
        targetSE = ftsReqDict.get("TargetSE")

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        log = gLogger.getSubLogger("@%s" % str(ftsReqID))

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        log.info("Perform summary update of the FTS Request")
        infoStr = ["glite-transfer-status -s %s -l %s" % (ftsServer, ftsGUID)]
        infoStr.append("FTS GUID:   %s" % ftsGUID)
        infoStr.append("FTS Server: %s" % ftsServer)
        log.info("\n".join(infoStr))
        res = oFTSRequest.summary()
        self.transferDB.setFTSReqLastMonitor(ftsReqID)
        if not res["OK"]:
            log.error("Failed to update the FTS request summary",
                      res["Message"])
            if "getTransferJobSummary2: Not authorised to query request" in res[
                    "Message"]:
                log.error(
                    "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side"
                )
                cleanUp = self.ftsJobExpired(ftsReqID, channelID)
                if not cleanUp["OK"]:
                    log.error(cleanUp["Message"])
                return cleanUp
            return res

        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            log.error("Failed to get FTS request summary", res["Message"])
            return res
        log.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            log.error("Failed to get FTS percentage complete", res["Message"])
            return res
        log.info('FTS Request found to be %.1f percent complete' %
                 res["Value"])
        self.transferDB.setFTSReqAttribute(ftsReqID, "PercentageComplete",
                                           res["Value"])
        self.transferDB.addLoggingEvent(ftsReqID, res["Value"])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res["OK"]:
            log.error("Failed to determine whether FTS request terminal",
                      res["Message"])
            return res
        if not res["Value"]:
            return S_OK()
        # # request is terminal
        return self.terminalRequest(oFTSRequest, ftsReqID, channelID, sourceSE)

    def terminalRequest(self, oFTSRequest, ftsReqID, channelID, sourceSE):
        """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        log.info("FTS Request found to be terminal, updating file states")
        #########################################################################
        # Get the LFNS associated to the FTS request
        log.info("Obtaining the LFNs associated to this request")
        res = self.transferDB.getFTSReqLFNs(ftsReqID, channelID, sourceSE)
        if not res["OK"]:
            log.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res["Value"]
        if not files:
            log.error("No files present for transfer")
            return S_ERROR("No files were found in the DB")

        lfns = files.keys()
        log.debug("Obtained %s files" % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res["OK"]:
            log.error("Failed to perform detailed monitoring of FTS request",
                      res["Message"])
            return res
        res = oFTSRequest.getFailed()
        if not res["OK"]:
            log.error("Failed to obtained failed files for FTS request",
                      res["Message"])
            return res
        failedFiles = res["Value"]
        res = oFTSRequest.getDone()
        if not res["OK"]:
            log.error("Failed to obtained successful files for FTS request",
                      res["Message"])
            return res
        completedFiles = res["Value"]

        # An LFN can be included more than once if it was entered into more than one Request.
        # FTS will only do the transfer once. We need to identify all FileIDs
        res = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not res["OK"]:
            log.error("Failed to get FileIDs associated to FTS Request",
                      res["Message"])
            return res
        fileIDs = res["Value"]
        res = self.transferDB.getAttributesForFilesList(fileIDs, ["LFN"])
        if not res["OK"]:
            log.error("Failed to get LFNs associated to FTS Request",
                      res["Message"])
            return res
        fileIDDict = res["Value"]

        fileToFTSUpdates = []
        completedFileIDs = []
        filesToRetry = []
        filesToFail = []

        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']
            if lfn in completedFiles:
                completedFileIDs.append(fileID)
                transferTime = 0
                res = oFTSRequest.getTransferTime(lfn)
                if res["OK"]:
                    transferTime = res["Value"]
                fileToFTSUpdates.append(
                    (fileID, "Completed", "", 0, transferTime))

            if lfn in failedFiles:
                failReason = ""
                res = oFTSRequest.getFailReason(lfn)
                if res["OK"]:
                    failReason = res["Value"]
                if "Source file/user checksum mismatch" in failReason:
                    filesToFail.append(fileID)
                    continue
                if self.missingSource(failReason):
                    log.error("The source SURL does not exist.",
                              "%s %s" % (lfn, oFTSRequest.getSourceSURL(lfn)))
                    filesToFail.append(fileID)
                else:
                    filesToRetry.append(fileID)
                log.error("Failed to replicate file on channel.",
                          "%s %s" % (channelID, failReason))
                fileToFTSUpdates.append((fileID, "Failed", failReason, 0, 0))

        # # update TransferDB.FileToFTS table
        updateFileToFTS = self.updateFileToFTS(ftsReqID, channelID,
                                               filesToRetry, filesToFail,
                                               completedFileIDs,
                                               fileToFTSUpdates)

        if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
            res = oFTSRequest.finalize()
            if not res["OK"]:
                log.error(
                    "Failed to perform the finalization for the FTS request",
                    res["Message"])
                return res

            log.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.transferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed to add logging event for FTS Request',
                          res['Message'])

            # update TransferDB.FileToCat table
            updateFileToCat = self.updateFileToCat(oFTSRequest, channelID,
                                                   fileIDDict, completedFiles,
                                                   filesToFail)
            if not updateFileToCat["OK"]:
                log.error(updateFileToCat["Message"])

            log.debug("Updating FTS request status")
            res = self.transferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed update FTS Request status', res['Message'])
        return S_OK()

    def updateFileToFTS(self, ftsReqID, channelID, filesToRetry, filesToFail,
                        completedFileIDs, fileToFTSUpdates):
        """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        allUpdated = True

        res = self.transferDB.resetFileChannelStatus(
            channelID, filesToRetry) if filesToRetry else S_OK()
        if not res["OK"]:
            log.error("Failed to update the Channel table for file to retry.",
                      res["Message"])
            allUpdated = False

        for fileID in filesToFail:
            log.info("Updating the Channel table for files to reschedule")
            res = self.transferDB.setFileToReschedule(fileID)
            if not res["OK"]:
                log.error("Failed to update Channel table for failed files.",
                          res["Message"])
                allUpdated = False
            elif res["Value"] == "max reschedule attempt reached":
                log.error("setting Channel status to 'Failed' : " %
                          res["Value"])
                res = self.transferDB.setFileChannelStatus(
                    channelID, fileID, 'Failed')
                if not res["OK"]:
                    log.error(
                        "Failed to update Channel table for failed files.",
                        res["Message"])
                    allUpdated = False

        if completedFileIDs:
            res = self.transferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    "Failed to update the Channel table for successful files.",
                    res["Message"])
                allUpdated = False
            res = self.transferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

        if fileToFTSUpdates:
            res = self.transferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res["OK"]:
                log.error("Failed to update the FileToFTS table for files.",
                          res["Message"])
                allUpdated = False

        return S_OK(allUpdated)

    def updateFileToCat(self, oFTSRequest, channelID, fileIDDict,
                        completedFiles, filesToFail):
        """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
        res = oFTSRequest.getFailedRegistrations()
        failedRegistrations = res["Value"]
        regFailedFileIDs = []
        regDoneFileIDs = []
        regForgetFileIDs = []
        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']

            if lfn in failedRegistrations:
                regFailedFileIDs.append(fileID)
                # if the LFN appears more than once, FileToCat needs to be reset only once
                del failedRegistrations[lfn]
            elif lfn in completedFiles:
                regDoneFileIDs.append(fileID)
            elif fileID in filesToFail:
                regForgetFileIDs.append(fileID)

        res = self.transferDB.setRegistrationWaiting(
            channelID, regFailedFileIDs) if regFailedFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to reset entries in FileToCat: %s" % res[
                "Message"]
            return res

        res = self.transferDB.setRegistrationDone(
            channelID, regDoneFileIDs) if regDoneFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone(
            channelID, regForgetFileIDs) if regForgetFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        return S_OK()

    @classmethod
    def missingSource(cls, failReason):
        """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
        for error in cls.missingSourceErrors:
            if error.search(failReason):
                return 1
        return 0
示例#4
0
class FTSMonitorAgent( AgentModule ):

  def initialize( self ):
    self.TransferDB = TransferDB()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    #########################################################################
    #  Get the details for all active FTS requests
    gLogger.info( 'Obtaining requests to monitor' )
    res = self.TransferDB.getFTSReq()
    if not res['OK']:
      gLogger.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "FTSMonitorAgent. No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res['Value']
    gLogger.info( 'Found %s FTS jobs' % len( ftsReqs ) )

    #######################################################################
    # Check them all....
    i = 1
    for ftsReqDict in ftsReqs:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting monitoring loop %s of %s\n\n" % ( infoStr, i, len( ftsReqs ) )
      gLogger.info( infoStr )
      res = self.monitorTransfer( ftsReqDict )
      i += 1
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ Monitors transfer  obtained from TransferDB
    """

    ftsReqID = ftsReqDict['FTSReqID']
    ftsGUID = ftsReqDict['FTSGuid']
    ftsServer = ftsReqDict['FTSServer']
    channelID = ftsReqDict['ChannelID']
    sourceSE = ftsReqDict['SourceSE']
    targetSE = ftsReqDict['TargetSE']

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    gLogger.info( 'Perform summary update of the FTS Request' )
    infoStr = "Monitoring FTS Job:\n\n"
    infoStr = "%sglite-transfer-status -s %s -l %s\n" % ( infoStr, ftsServer, ftsGUID )
    infoStr = "%s%s%s\n" % ( infoStr, 'FTS GUID:'.ljust( 20 ), ftsGUID )
    infoStr = "%s%s%s\n\n" % ( infoStr, 'FTS Server:'.ljust( 20 ), ftsServer )
    gLogger.info( infoStr )
    res = oFTSRequest.summary()
    self.TransferDB.setFTSReqLastMonitor( ftsReqID )
    if not res['OK']:
      gLogger.error( "Failed to update the FTS request summary", res['Message'] )
      return res
    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      gLogger.error( "Failed to get FTS request summary", res['Message'] )
      return res
    gLogger.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      gLogger.error( "Failed to get FTS percentage complete", res['Message'] )
      return res
    gLogger.info( 'FTS Request found to be %.1f percent complete' % res['Value'] )
    self.TransferDB.setFTSReqAttribute( ftsReqID, 'PercentageComplete', res['Value'] )
    self.TransferDB.addLoggingEvent( ftsReqID, res['Value'] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res['OK']:
      gLogger.error( "Failed to determine whether FTS request terminal", res['Message'] )
      return res
    if not res['Value']:
      return S_OK()
    gLogger.info( 'FTS Request found to be terminal, updating file states' )

    #########################################################################
    # Get the LFNS associated to the FTS request
    gLogger.info( 'Obtaining the LFNs associated to this request' )
    res = self.TransferDB.getFTSReqLFNs( ftsReqID )
    if not res['OK']:
      gLogger.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res['Value']
    if not files:
      gLogger.error( 'No files present for transfer' )
      return S_ERROR( 'No files were found in the DB' )
    lfns = files.keys()
    gLogger.info( 'Obtained %s files' % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res['OK']:
      gLogger.error( "Failed to perform detailed monitoring of FTS request", res['Message'] )
      return res
    res = oFTSRequest.getFailed()
    if not res['OK']:
      gLogger.error( "Failed to obtained failed files for FTS request", res['Message'] )
      return res
    failedFiles = res['Value']
    res = oFTSRequest.getDone()
    if not res['OK']:
      gLogger.error( "Failed to obtained successful files for FTS request", res['Message'] )
      return res
    completedFiles = res['Value']

    fileToFTSUpdates = []
    completedFileIDs = []
    for lfn in completedFiles:
      fileID = files[lfn]
      completedFileIDs.append( fileID )
      transferTime = 0
      res = oFTSRequest.getTransferTime( lfn )
      if res['OK']:
        transferTime = res['Value']
      fileToFTSUpdates.append( ( fileID, 'Completed', '', 0, transferTime ) )

    filesToRetry = []
    filesToReschedule = []
    for lfn in failedFiles:
      fileID = files[lfn]
      failReason = ''
      res = oFTSRequest.getFailReason( lfn )
      if res['OK']:
        failReason = res['Value']
      if self.missingSource( failReason ):
        gLogger.error( 'The source SURL does not exist.', '%s %s' % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
        filesToReschedule.append( fileID )
      else:
        filesToRetry.append( fileID )
      gLogger.error( 'Failed to replicate file on channel.', "%s %s" % ( channelID, failReason ) )
      fileToFTSUpdates.append( ( fileID, 'Failed', failReason, 0, 0 ) )

    allUpdated = True
    if filesToRetry:
      gLogger.info( 'Updating the Channel table for files to retry' )
      res = self.TransferDB.resetFileChannelStatus( channelID, filesToRetry )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
        allUpdated = False
    for fileID in filesToReschedule:
      gLogger.info( 'Updating the Channel table for files to reschedule' )
      res = self.TransferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
      if not res['OK']:
        gLogger.error( 'Failed to update Channel table for failed files.', res['Message'] )
        allUpdated = False

    if completedFileIDs:
      gLogger.info( 'Updating the Channel table for successful files' )
      res = self.TransferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for successful files.', res['Message'] )
        allUpdated = False
      gLogger.info( 'Updating the Channel table for ancestors of successful files' )
      res = self.TransferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False
      
      #
      #  cibak: temporary switch off of regitstration in FileToCat
      #
      #gLogger.info( 'Updating the FileToCat table for successful files' )
      #res = self.TransferDB.setRegistrationWaiting( channelID, completedFileIDs )
      #if not res['OK']:
      #  gLogger.error( 'Failed to update the FileToCat table for successful files.', res['Message'] )
      #  allUpdated = False

    if fileToFTSUpdates:
      gLogger.info( 'Updating the FileToFTS table for files' )
      res = self.TransferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res['OK']:
        gLogger.error( 'Failed to update the FileToFTS table for files.', res['Message'] )
        allUpdated = False

    if allUpdated:
      res = oFTSRequest.finalize()
      if not res['OK']:
        gLogger.error( "Failed to perform the finalization for the FTS request", res['Message'] )
        return res

      gLogger.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.TransferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        gLogger.error( 'Failed to add logging event for FTS Request', res['Message'] )

      gLogger.info( 'Updating FTS request status' )
      res = self.TransferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        gLogger.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()

  def missingSource( self, failReason ):
    missingSourceErrors = ['SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
                           'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
                           'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
                           'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
                           'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
                           'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist']
    for error in missingSourceErrors:
      if re.search( error, failReason ):
        return 1
    return 0
示例#5
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
  # # transfer DB handle
  transferDB = None
  # # thread pool
  threadPool = None
  # # min threads
  minThreads = 1
  # # max threads
  maxThreads = 10

  # # missing source regexp patterns
  missingSourceErrors = [
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
    re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
               " Command failed. : open error: No such file or directory" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    self.am_setOption( "shifterProxy", "DataManager" )
    self.minThreads = self.am_getOption( "MinThreads", self.minThreads )
    self.maxThreads = self.am_getOption( "MaxThreads", self.maxThreads )
    minmax = ( abs( self.minThreads ), abs( self.maxThreads ) )
    self.minThreads, self.maxThreads = min( minmax ), max( minmax )
    self.log.info( "ThreadPool min threads = %s" % self.minThreads )
    self.log.info( "ThreadPool max threads = %s" % self.maxThreads )
    self.threadPool = ThreadPool( self.minThreads, self.maxThreads )
    self.threadPool.daemonize()
    return S_OK()

  def execute( self ):
    """ push jobs to the thread pool """
    self.log.info( "Obtaining requests to monitor" )
    res = self.transferDB.getFTSReq()
    if not res["OK"]:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res["Value"]:
      self.log.info( "No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res["Value"]
    self.log.info( "Found %s FTS jobs" % len( ftsReqs ) )
    i = 1
    for ftsJob in ftsReqs:
      while True:
        self.log.debug( "submitting FTS Job %s FTSReqID=%s to monitor" % ( i, ftsJob["FTSReqID"] ) )
        ret = self.threadPool.generateJobAndQueueIt( self.monitorTransfer, args = ( ftsJob, ), )
        if ret["OK"]:
          i += 1
          break
        # # sleep 1 second to proceed
        time.sleep( 1 )

    self.threadPool.processAllResults()
    return S_OK()

  def ftsJobExpired( self, ftsReqID, channelID ):
    """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )
    fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not fileIDs["OK"]:
      log.error( "Unable to retrieve FileIDs associated to %s request" % ftsReqID )
      return fileIDs
    fileIDs = fileIDs["Value"]

    # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
    for fileID in fileIDs:
      fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Status", "Failed" )
      if not fileStatus["OK"]:
        log.error( "Unable to set FileToFTS status to 'Failed' for FileID %s: %s" % ( fileID,
                                                                                     fileStatus["Message"] ) )

      failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Reason", "FTS job expired on server" )
      if not failReason["OK"]:
        log.error( "Unable to set FileToFTS reason for FileID %s: %s" % ( fileID,
                                                                         failReason["Message"] ) )
    # # update Channel table
    resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
    if not resetChannels["OK"]:
      log.error( "Failed to reset Channel table for files to retry" )
      return resetChannels

    # # update FTSReq table
    log.info( "Setting FTS request status to 'Finished'" )
    ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, "Finished" )
    if not ftsReqStatus["OK"]:
      log.error( "Failed update FTS Request status", ftsReqStatus["Message"] )
      return ftsReqStatus

    # # if we land here, everything should be OK
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
    ftsReqID = ftsReqDict.get( "FTSReqID" )
    ftsGUID = ftsReqDict.get( "FTSGuid" )
    ftsServer = ftsReqDict.get( "FTSServer" )
    channelID = ftsReqDict.get( "ChannelID" )
    sourceSE = ftsReqDict.get( "SourceSE" )
    targetSE = ftsReqDict.get( "TargetSE" )

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    log.info( "Perform summary update of the FTS Request" )
    infoStr = [ "glite-transfer-status -s %s -l %s" % ( ftsServer, ftsGUID ) ]
    infoStr.append( "FTS GUID:   %s" % ftsGUID )
    infoStr.append( "FTS Server: %s" % ftsServer )
    log.info( "\n".join( infoStr ) )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to update the FTS request summary", res["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        log.error( "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side" )
        cleanUp = self.ftsJobExpired( ftsReqID, channelID )
        if not cleanUp["OK"]:
          log.error( cleanUp["Message"] )
        return cleanUp
      return res

    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      log.error( "Failed to get FTS request summary", res["Message"] )
      return res
    log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      log.error( "Failed to get FTS percentage complete", res["Message"] )
      return res
    log.info( 'FTS Request found to be %.1f percent complete' % res["Value"] )
    self.transferDB.setFTSReqAttribute( ftsReqID, "PercentageComplete", res["Value"] )
    self.transferDB.addLoggingEvent( ftsReqID, res["Value"] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res["OK"]:
      log.error( "Failed to determine whether FTS request terminal", res["Message"] )
      return res
    if not res["Value"]:
      return S_OK()
    # # request is terminal
    return self.terminalRequest( oFTSRequest, ftsReqID, channelID, sourceSE )

  def terminalRequest( self, oFTSRequest, ftsReqID, channelID, sourceSE ):
    """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    log.info( "FTS Request found to be terminal, updating file states" )
    #########################################################################
    # Get the LFNS associated to the FTS request
    log.info( "Obtaining the LFNs associated to this request" )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res["OK"]:
      log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res["Value"]
    if not files:
      log.error( "No files present for transfer" )
      return S_ERROR( "No files were found in the DB" )

    lfns = files.keys()
    log.debug( "Obtained %s files" % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res["OK"]:
      log.error( "Failed to perform detailed monitoring of FTS request", res["Message"] )
      return res
    res = oFTSRequest.getFailed()
    if not res["OK"]:
      log.error( "Failed to obtained failed files for FTS request", res["Message"] )
      return res
    failedFiles = res["Value"]
    res = oFTSRequest.getDone()
    if not res["OK"]:
      log.error( "Failed to obtained successful files for FTS request", res["Message"] )
      return res
    completedFiles = res["Value"]

    # An LFN can be included more than once if it was entered into more than one Request.
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to get FileIDs associated to FTS Request", res["Message"] )
      return res
    fileIDs = res["Value"]
    res = self.transferDB.getAttributesForFilesList( fileIDs, ["LFN"] )
    if not res["OK"]:
      log.error( "Failed to get LFNs associated to FTS Request", res["Message"] )
      return res
    fileIDDict = res["Value"]

    fileToFTSUpdates = []
    completedFileIDs = []
    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res["OK"]:
          transferTime = res["Value"]
        fileToFTSUpdates.append( ( fileID, "Completed", "", 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ""
        res = oFTSRequest.getFailReason( lfn )
        if res["OK"]:
          failReason = res["Value"]
        if "Source file/user checksum mismatch" in failReason:
          filesToFail.append( fileID )
          continue
        if self.missingSource( failReason ):
          log.error( "The source SURL does not exist.", "%s %s" % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        log.error( "Failed to replicate file on channel.", "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, "Failed", failReason, 0, 0 ) )

    # # update TransferDB.FileToFTS table
    updateFileToFTS = self.updateFileToFTS( ftsReqID, channelID,
                                            filesToRetry, filesToFail,
                                            completedFileIDs, fileToFTSUpdates )

    if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
      res = oFTSRequest.finalize()
      if not res["OK"]:
        log.error( "Failed to perform the finalization for the FTS request", res["Message"] )
        return res

      log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      # update TransferDB.FileToCat table
      updateFileToCat = self.updateFileToCat( oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail )
      if not updateFileToCat["OK"]:
        log.error( updateFileToCat["Message"] )

      log.debug( "Updating FTS request status" )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()


  def updateFileToFTS( self, ftsReqID, channelID, filesToRetry, filesToFail, completedFileIDs, fileToFTSUpdates ):
    """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    allUpdated = True

    res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry ) if filesToRetry else S_OK()
    if not res["OK"]:
      log.error( "Failed to update the Channel table for file to retry.", res["Message"] )
      allUpdated = False

    for fileID in filesToFail:
      log.info( "Updating the Channel table for files to reschedule" )
      res = self.transferDB.setFileToReschedule( fileID )
      if not res["OK"]:
        log.error( "Failed to update Channel table for failed files.", res["Message"] )
        allUpdated = False
      elif res["Value"] == "max reschedule attempt reached":
        log.error( "setting Channel status to 'Failed' : " % res["Value"] )
        res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
        if not res["OK"]:
          log.error( "Failed to update Channel table for failed files.", res["Message"] )
          allUpdated = False

    if completedFileIDs:
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( "Failed to update the Channel table for successful files.", res["Message"] )
        allUpdated = False
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res["OK"]:
        log.error( "Failed to update the FileToFTS table for files.", res["Message"] )
        allUpdated = False

    return S_OK( allUpdated )

  def updateFileToCat( self, oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail ):
    """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
    res = oFTSRequest.getFailedRegistrations()
    failedRegistrations = res["Value"]
    regFailedFileIDs = []
    regDoneFileIDs = []
    regForgetFileIDs = []
    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']

      if lfn in failedRegistrations:
        regFailedFileIDs.append( fileID )
        # if the LFN appears more than once, FileToCat needs to be reset only once
        del failedRegistrations[lfn]
      elif lfn in completedFiles:
        regDoneFileIDs.append( fileID )
      elif fileID in filesToFail:
        regForgetFileIDs.append( fileID )

    res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs ) if regFailedFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to reset entries in FileToCat: %s" % res["Message"]
      return res

    res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs ) if regDoneFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    # This entries could also be set to Failed, but currently there is no method to do so.
    res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs ) if regForgetFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    return S_OK()

  @classmethod
  def missingSource( cls, failReason ):
    """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
    for error in cls.missingSourceErrors:
      if error.search( failReason ):
        return 1
    return 0