Exemplo n.º 1
0
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache
    self.seCache = {}

    # Clients
    self.fc = FileCatalog()
    self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.info( "usage of FTS system is banned for request's owner" )
        return self.rmTransfer()
      return self.ftsTransfer()
    return self.rmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( replicas["Message"] )
      return replicas

    reMissing = re.compile( "no such file or directory" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "file %s does not exists" % failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['CheckSumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm, seCache = self.seCache )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      bannedReplicas = replicas["Banned"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "FTSScheduleFail" )
        if bannedReplicas:
          self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN )
        elif noReplicas:
          self.log.error( "unable to schedule %s, file doesn't exist" % opFile.LFN )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "unable to schedule %s, all replicas have a bad checksum" % opFile.LFN )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN" % opFile.LFN )

      else:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      for fileID in ftsSchedule["Successful"]:
        gMonitor.addMark( "FTSScheduleOK", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Status = "Scheduled"
            self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )

      for fileID in ftsSchedule["Failed"]:
        gMonitor.addMark( "FTSScheduleFail", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Error = ftsSchedule["Failed"][fileID]
            if 'sourceSURL equals to targetSURL' in opFile.Error:
              # In this case there is no need to continue
              opFile.Status = 'Failed'
            self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.rmTransfer( fromFTS = True )

  def rmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      sourceRead = self.rssSEStatus( sourceSE, "ReadAccess" )
      if not sourceRead["OK"]:
        self.log.info( sourceRead["Message"] )
        for opFile in self.operation:
          opFile.Error = sourceRead["Message"]
        self.operation.Error = sourceRead["Message"]
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return sourceRead

      if not sourceRead["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    # # loop over files
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( replicas["Message"] )
        continue
      replicas = replicas["Value"]
      if not replicas["Valid"]:
        self.log.warn( "unable to find valid replicas for %s" % lfn )
        continue
      # # get the first one in the list
      if sourceSE not in replicas['Valid']:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, replicas["Valid"][0] ) )
        sourceSE = replicas["Valid"][0]

      # # loop over targetSE
      catalog = self.operation.Catalog
      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE == sourceSE:
          self.log.warn( "Request to replicate %s to the source SE: %s" % ( lfn, sourceSE ) )
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalog )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "failed to replicate %s to %s." % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "failed to replicate and register file %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( opFile.Error )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
Exemplo n.º 2
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False

    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0
    self.transferTime = 0

    self.submitCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/SubmitCommand', 'glite-transfer-submit' )
    self.monitorCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/MonitorCommand', 'glite-transfer-status' )
    self.ftsVersion = Operations().getValue( 'DataManagement/FTSVersion', 'FTS2' )
    self.ftsJob = None
    self.ftsFiles = []

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      cksmType = self.oSourceSE.checksumType()
      if cksmType in ( "NONE", "NULL" ):
        self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                            self.sourceSE ) )
        self.__cksmTest = False
      elif cksmType != self.__cksmType:
        self.log.warn( "Checksum type mismatch, disabling checksum test" )
        self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      cksmType = self.oTargetSE.checksumType()
      if cksmType in ( "NONE", "NULL" ):
        self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                            self.targetSE ) )
        self.__cksmTest = False
      elif cksmType != self.__cksmType:
        self.log.warn( "Checksum type mismatch, disabling checksum test" )
        self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( protocol = 'srm' )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()


  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Source', surl )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Target', surl )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__prepareForSubmission()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput, full = False )
    return S_OK( resDict )

  def __prepareForSubmission( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if lfn not in self.catalogMetadata ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

      res = returnSingleResult( self.oSourceSE.getURL( lfn, protocol = 'srm' ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = []
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve.append( lfn )
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for lfn, metadata in res['Value']['Successful'].items():
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata.get( 'Unavailable', False ):
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata.get( 'Lost', False ):
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata.get( 'Cached', metadata['Accessible'] ):
        if lfnStatus != 'Staging':
          toStage.append( lfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging", stage["Message"] )
        for lfn in toStage:
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for lfn in toStage:
          if lfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif lfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][lfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = returnSingleResult( self.oTargetSE.getURL( lfn, protocol = 'srm' ) )
      if not res['OK']:
        reason = res.get( 'Message', res['Message'] )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

      res = self.setTargetSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = []
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve.append( lfn )
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for lfn, exists in res['Value']['Successful'].items():
      if exists:
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == self.fileDict[lfn]['Target']:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( lfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createFTSFiles( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    self.__updateMetadataCache()
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      if lfnStatus not in self.noSubmitStatus:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        ftsFile = FTSFile()
        ftsFile.LFN = lfn
        ftsFile.SourceSURL = self.fileDict[lfn].get( 'Source' )
        ftsFile.TargetSURL = self.fileDict[lfn].get( 'Target' )
        ftsFile.SourceSE = self.sourceSE
        ftsFile.TargetSE = self.targetSE
        ftsFile.Status = self.fileDict[lfn].get( 'Status' )
        ftsFile.Checksum = cksmStr
        ftsFile.Size = self.catalogMetadata.get( lfn, {} ).get( 'Size' )
        self.ftsFiles.append( ftsFile )
        self.submittedFiles += 1
    return S_OK()

  def __createFTSJob( self, guid = None ):
    self.__createFTSFiles()
    ftsJob = FTSJob()
    ftsJob.RequestID = 0
    ftsJob.OperationID = 0
    ftsJob.SourceSE = self.sourceSE
    ftsJob.TargetSE = self.targetSE
    ftsJob.SourceToken = self.sourceToken
    ftsJob.TargetToken = self.targetToken
    ftsJob.FTSServer = self.ftsServer
    if guid:
      ftsJob.FTSGUID = guid

    for ftsFile in self.ftsFiles:
      ftsFile.Attempt += 1
      ftsFile.Error = ""
      ftsJob.addFile( ftsFile )
    self.ftsJob = ftsJob

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    log = gLogger.getSubLogger( 'Submit' )
    self.__createFTSJob()

    submit = self.ftsJob.submitFTS( self.ftsVersion, command = self.submitCommand )
    if not submit["OK"]:
      log.error( "unable to submit FTSJob: %s" % submit["Message"] )
      return submit

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )

    # # update statuses for job files
    for ftsFile in self.ftsJob:
      ftsFile.FTSGUID = self.ftsJob.FTSGUID
      ftsFile.Status = "Submitted"
      ftsFile.Attempt += 1

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )
    self.ftsGUID = self.ftsJob.FTSGUID
    return S_OK()

  def __resolveFTSServer( self ):
    """
    resolve FTS server to use, it should be the closest one from target SE

    :param self: self reference
    """
    if self.ftsVersion.upper() == 'FTS2':

      from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS2ServersForSites
      if not self.targetSE:
        return S_ERROR( "Target SE not set" )
      res = getSitesForSE( self.targetSE )
      if not res['OK'] or not res['Value']:
        return S_ERROR( "Could not determine target site" )
      targetSites = res['Value']

      targetSite = ''
      for targetSite in targetSites:
        targetFTS = getFTS2ServersForSites( [targetSite] )
        if targetFTS['OK']:
          ftsTarget = targetFTS['Value'][targetSite]
          if ftsTarget:
            self.ftsServer = ftsTarget
            return S_OK( self.ftsServer )
        else:
          return targetFTS

    elif self.ftsVersion.upper() == 'FTS3':

      from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS3Servers
      res = getFTS3Servers()
      if not res['OK']:
        return res
      ftsServerList = res['Value']
      if ftsServerList:
        # Here we take the first one, regardless of the policy...
        # Unclean but all this will disapear after refactoring the fts code
        self.ftsServer = ftsServerList[0]
        return S_OK( self.ftsServer )

    else:
      return S_ERROR( 'Unknown FTS version %s' % self.ftsVersion )


    return S_ERROR( 'No FTS server found for %s' % targetSite )

  ####################################################################
  #
  #  Methods for monitoring
  #

  def summary( self, untilTerminal = False, printOutput = False ):
    """ summary of FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    while not self.isTerminal:
      res = self.__parseOutput( full = True )
      if not res['OK']:
        return res
      if untilTerminal:
        self.__print()
      self.isRequestTerminal()
      if res['Value'] or ( not untilTerminal ):
        break
      time.sleep( 1 )
    if untilTerminal:
      print ""
    if printOutput and ( not untilTerminal ):
      return self.dumpSummary( printOutput = printOutput )
    return S_OK()

  def monitor( self, untilTerminal = False, printOutput = False, full = True ):
    """ monitor FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    if not self.ftsJob:
      self.resolveSource()
      self.__createFTSJob( self.ftsGUID )
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    if untilTerminal:
      res = self.summary( untilTerminal = untilTerminal, printOutput = printOutput )
      if not res['OK']:
        return res
    res = self.__parseOutput( full = full )
    if not res['OK']:
      return res
    if untilTerminal:
      self.finalize()
    if printOutput:
      self.dump()
    return res

  def dumpSummary( self, printOutput = False ):
    """ get FTS job summary as str

    :param self: self reference
    :param bool printOutput: print summary to stdout
    """

    outStr = ''
    for status in sorted( self.statusSummary ):
      if self.statusSummary[status]:
        outStr = '%s\t%-10s : %-10s\n' % ( outStr, status, str( self.statusSummary[status] ) )
    outStr = outStr.rstrip( '\n' )
    if printOutput:
      print outStr
    return S_OK( outStr )

  def __print( self ):
    """ print progress bar of FTS job completeness to stdout

    :param self: self reference
    """
    width = 100
    bits = int( ( width * self.percentageComplete ) / 100 )
    outStr = "|%s>%s| %.1f%s %s %s" % ( "="*bits, " "*( width - bits ),
                                        self.percentageComplete, "%",
                                        self.requestStatus, " "*10 )
    sys.stdout.write( "%s\r" % ( outStr ) )
    sys.stdout.flush()

  def dump( self ):
    """ print FTS job parameters and files to stdout

    :param self: self reference
    """
    print "%-10s : %-10s" % ( "Status", self.requestStatus )
    print "%-10s : %-10s" % ( "Source", self.sourceSE )
    print "%-10s : %-10s" % ( "Target", self.targetSE )
    print "%-10s : %-128s" % ( "Server", self.ftsServer )
    print "%-10s : %-128s" % ( "GUID", self.ftsGUID )
    for lfn in sorted( self.fileDict ):
      print "\n  %-15s : %-128s" % ( 'LFN', lfn )
      for key in ['Source', 'Target', 'Status', 'Reason', 'Duration']:
        print "  %-15s : %-128s" % ( key, str( self.fileDict[lfn].get( key ) ) )
    return S_OK()

  def __isSummaryValid( self ):
    """ check validity of FTS job summary report

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK()

  def __parseOutput( self, full = False ):
    """ execute glite-transfer-status command and parse its output

    :param self: self reference
    :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well
    """
    monitor = self.ftsJob.monitorFTS( self.ftsVersion, command = self.monitorCommand, full = full )
    if not monitor['OK']:
      return monitor
    self.percentageComplete = self.ftsJob.Completeness
    self.requestStatus = self.ftsJob.Status
    self.submitTime = self.ftsJob.SubmitTime

    statusSummary = monitor['Value']
    if statusSummary:
      for state in statusSummary:
        self.statusSummary[state] = statusSummary[state]

    self.transferTime = 0
    for ftsFile in self.ftsJob:
      lfn = ftsFile.LFN
      self.__setFileParameter( lfn, 'Status', ftsFile.Status )
      self.__setFileParameter( lfn, 'Reason', ftsFile.Error )
      self.__setFileParameter( lfn, 'Duration', ftsFile._duration )
      targetURL = self.__getFileParameter( lfn, 'Target' )
      if not targetURL['OK']:
        self.__setFileParameter( lfn, 'Target', ftsFile.TargetSURL )
      sourceURL = self.__getFileParameter( lfn, 'Source' )
      if not sourceURL['OK']:
        self.__setFileParameter( lfn, 'Source', ftsFile.SourceSURL )
      self.transferTime += int( ftsFile._duration )
    return S_OK()

  ####################################################################
  #
  #  Methods for finalization
  #

  def finalize( self ):
    """ finalize FTS job

    :param self: self reference
    """
    self.__updateMetadataCache()
    transEndTime = dateTime()
    regStartTime = time.time()
    res = self.getTransferStatistics()
    transDict = res['Value']

    res = self.__registerSuccessful( transDict['transLFNs'] )

    regSuc, regTotal = res['Value']
    regTime = time.time() - regStartTime
    if self.sourceSE and self.targetSE:
      self.__sendAccounting( regSuc, regTotal, regTime, transEndTime, transDict )
    return S_OK()

  def getTransferStatistics( self ):
    """ collect information of Transfers that can be used by Accounting

    :param self: self reference
    """
    transDict = { 'transTotal': len( self.fileDict ),
                  'transLFNs': [],
                  'transOK': 0,
                  'transSize': 0 }

    for lfn in self.fileDict:
      if self.fileDict[lfn].get( 'Status' ) in self.successfulStates:
        if self.fileDict[lfn].get( 'Duration', 0 ):
          transDict['transLFNs'].append( lfn )
          transDict['transOK'] += 1
          if lfn in self.catalogMetadata:
            transDict['transSize'] += self.catalogMetadata[lfn].get( 'Size', 0 )

    return S_OK( transDict )

  def getFailedRegistrations( self ):
    """ get failed registrations dict

    :param self: self reference
    """
    return S_OK( self.failedRegistrations )

  def __registerSuccessful( self, transLFNs ):
    """ register successfully transferred files to the catalogs,
    fill failedRegistrations dict for files that failed to register

    :param self: self reference
    :param list transLFNs: LFNs in FTS job
    """
    self.failedRegistrations = {}
    toRegister = {}
    for lfn in transLFNs:
      res = returnSingleResult( self.oTargetSE.getURL( self.fileDict[lfn].get( 'Target' ), protocol = 'srm' ) )
      if not res['OK']:
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        toRegister[lfn] = { 'PFN' : res['Value'], 'SE' : self.targetSE }
    if not toRegister:
      return S_OK( ( 0, 0 ) )
    res = self.__getCatalogObject()
    if not res['OK']:
      for lfn in toRegister:
        self.failedRegistrations = toRegister
        self.log.error( 'Failed to get Catalog Object', res['Message'] )
        return S_OK( ( 0, len( toRegister ) ) )
    res = self.oCatalog.addReplica( toRegister )
    if not res['OK']:
      self.failedRegistrations = toRegister
      self.log.error( 'Failed to get Catalog Object', res['Message'] )
      return S_OK( ( 0, len( toRegister ) ) )
    for lfn, error in res['Value']['Failed'].items():
      self.failedRegistrations[lfn] = toRegister[lfn]
      self.log.error( 'Registration of Replica failed', '%s : %s' % ( lfn, str( error ) ) )
    return S_OK( ( len( res['Value']['Successful'] ), len( toRegister ) ) )

  def __sendAccounting( self, regSuc, regTotal, regTime, transEndTime, transDict ):
    """ send accounting record

    :param self: self reference
    :param regSuc: number of files successfully registered
    :param regTotal: number of files attepted to register
    :param regTime: time stamp at the end of registration
    :param transEndTime: time stamp at the end of FTS job
    :param dict transDict: dict holding couters for files being transerred, their sizes and successfull transfers
    """

    oAccounting = DataOperation()
    oAccounting.setEndTime( transEndTime )
    oAccounting.setStartTime( self.submitTime )

    accountingDict = {}
    accountingDict['OperationType'] = 'replicateAndRegister'
    result = getProxyInfo()
    if not result['OK']:
      userName = '******'
    else:
      userName = result['Value'].get( 'username', 'unknown' )
    accountingDict['User'] = userName
    accountingDict['Protocol'] = 'FTS' if 'fts3' not in self.ftsServer else 'FTS3'
    accountingDict['RegistrationTime'] = regTime
    accountingDict['RegistrationOK'] = regSuc
    accountingDict['RegistrationTotal'] = regTotal
    accountingDict['TransferOK'] = transDict['transOK']
    accountingDict['TransferTotal'] = transDict['transTotal']
    accountingDict['TransferSize'] = transDict['transSize']
    accountingDict['FinalStatus'] = self.requestStatus
    accountingDict['Source'] = self.sourceSE
    accountingDict['Destination'] = self.targetSE
    accountingDict['TransferTime'] = self.transferTime
    oAccounting.setValuesFromDict( accountingDict )
    self.log.verbose( "Attempting to commit accounting message..." )
    oAccounting.commit()
    self.log.verbose( "...committed." )
    return S_OK()
Exemplo n.º 3
0
def main():
    catalog = None
    Script.registerSwitch("C:", "Catalog=", "Catalog to use")
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(" requestName:  a request name")
    Script.registerArgument(" LFNs:         single LFN or file with LFNs")
    Script.registerArgument(["targetSE:     target SE"])
    Script.parseCommandLine()
    for switch in Script.getUnprocessedSwitches():
        if switch[0] == "C" or switch[0].lower() == "catalog":
            catalog = switch[1]

    args = Script.getPositionalArgs()

    requestName = None
    targetSEs = None
    if len(args) < 3:
        Script.showHelp(exitCode=1)

    requestName = args[0]
    lfnList = getLFNList(args[1])
    targetSEs = list(
        set([se for targetSE in args[2:] for se in targetSE.split(",")]))

    gLogger.info("Will create request '%s' with 'ReplicateAndRegister' "
                 "operation using %s lfns and %s target SEs" %
                 (requestName, len(lfnList), len(targetSEs)))

    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from DIRAC.Core.Utilities.List import breakListIntoChunks

    lfnChunks = breakListIntoChunks(lfnList, 100)
    multiRequests = len(lfnChunks) > 1

    error = 0
    count = 0
    reqClient = ReqClient()
    fc = FileCatalog()
    requestIDs = []
    for lfnChunk in lfnChunks:
        metaDatas = fc.getFileMetadata(lfnChunk)
        if not metaDatas["OK"]:
            gLogger.error("unable to read metadata for lfns: %s" %
                          metaDatas["Message"])
            error = -1
            continue
        metaDatas = metaDatas["Value"]
        for failedLFN, reason in metaDatas["Failed"].items():
            gLogger.error("skipping %s: %s" % (failedLFN, reason))
        lfnChunk = set(metaDatas["Successful"])

        if not lfnChunk:
            gLogger.error("LFN list is empty!!!")
            error = -1
            continue

        if len(lfnChunk) > Operation.MAX_FILES:
            gLogger.error(
                "too many LFNs, max number of files per operation is %s" %
                Operation.MAX_FILES)
            error = -1
            continue

        count += 1
        request = Request()
        request.RequestName = requestName if not multiRequests else "%s_%d" % (
            requestName, count)

        replicateAndRegister = Operation()
        replicateAndRegister.Type = "ReplicateAndRegister"
        replicateAndRegister.TargetSE = ",".join(targetSEs)
        if catalog is not None:
            replicateAndRegister.Catalog = catalog

        for lfn in lfnChunk:
            metaDict = metaDatas["Successful"][lfn]
            opFile = File()
            opFile.LFN = lfn
            opFile.Size = metaDict["Size"]

            if "Checksum" in metaDict:
                # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD'
                opFile.Checksum = metaDict["Checksum"]
                opFile.ChecksumType = "ADLER32"
            replicateAndRegister.addFile(opFile)

        request.addOperation(replicateAndRegister)

        putRequest = reqClient.putRequest(request)
        if not putRequest["OK"]:
            gLogger.error("unable to put request '%s': %s" %
                          (request.RequestName, putRequest["Message"]))
            error = -1
            continue
        requestIDs.append(str(putRequest["Value"]))
        if not multiRequests:
            gLogger.always(
                "Request '%s' has been put to ReqDB for execution." %
                request.RequestName)

    if multiRequests:
        gLogger.always(
            "%d requests have been put to ReqDB for execution, with name %s_<num>"
            % (count, requestName))
    if requestIDs:
        gLogger.always("RequestID(s): %s" % " ".join(requestIDs))
    gLogger.always(
        "You can monitor requests' status using command: 'dirac-rms-request <requestName/ID>'"
    )
    DIRAC.exit(error)
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(" sourceSE:   source SE")
    Script.registerArgument(" LFN:        LFN or file containing a List of LFNs")
    Script.registerArgument(["targetSE:   target SEs"])
    Script.parseCommandLine()

    import DIRAC
    from DIRAC import gLogger

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    args = Script.getPositionalArgs()

    sourceSE = args[0]
    lfnList = getLFNList(args[1])
    targetSEs = list(set([se for targetSE in args[2:] for se in targetSE.split(",")]))

    gLogger.info(
        "Will create request with 'MoveReplica' "
        "operation using %s lfns and %s target SEs" % (len(lfnList), len(targetSEs))
    )

    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from DIRAC.Core.Utilities.List import breakListIntoChunks

    lfnChunks = breakListIntoChunks(lfnList, 100)
    multiRequests = len(lfnChunks) > 1

    error = 0
    count = 0
    reqClient = ReqClient()
    fc = FileCatalog()
    for lfnChunk in lfnChunks:
        metaDatas = fc.getFileMetadata(lfnChunk)
        if not metaDatas["OK"]:
            gLogger.error("unable to read metadata for lfns: %s" % metaDatas["Message"])
            error = -1
            continue
        metaDatas = metaDatas["Value"]
        for failedLFN, reason in metaDatas["Failed"].items():
            gLogger.error("skipping %s: %s" % (failedLFN, reason))
        lfnChunk = set(metaDatas["Successful"])

        if not lfnChunk:
            gLogger.error("LFN list is empty!!!")
            error = -1
            continue

        if len(lfnChunk) > Operation.MAX_FILES:
            gLogger.error("too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES)
            error = -1
            continue

        count += 1

        request = Request()
        request.RequestName = "%s_%s" % (
            md5(repr(time.time()).encode()).hexdigest()[:16],
            md5(repr(time.time()).encode()).hexdigest()[:16],
        )

        moveReplica = Operation()
        moveReplica.Type = "MoveReplica"
        moveReplica.SourceSE = sourceSE
        moveReplica.TargetSE = ",".join(targetSEs)

        for lfn in lfnChunk:
            metaDict = metaDatas["Successful"][lfn]
            opFile = File()
            opFile.LFN = lfn
            opFile.Size = metaDict["Size"]

            if "Checksum" in metaDict:
                # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD'
                opFile.Checksum = metaDict["Checksum"]
                opFile.ChecksumType = "ADLER32"
            moveReplica.addFile(opFile)

        request.addOperation(moveReplica)

        result = reqClient.putRequest(request)
        if not result["OK"]:
            gLogger.error("Failed to submit Request: %s" % (result["Message"]))
            error = -1
            continue

        if not multiRequests:
            gLogger.always("Request %d submitted successfully" % result["Value"])

    if multiRequests:
        gLogger.always("%d requests have been submitted" % (count))
    DIRAC.exit(error)
Exemplo n.º 5
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        Client.__init__(self, **kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    ##########################################################################
    #
    # This section contains the specific methods for LFC->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]
        res = self.__getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        res = self.__getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas = res['Value']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses)

    def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sortList(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].items():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.__reportProblematicReplicas(sizeMismatch, se,
                                                 'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].items():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.items():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if lfnMetadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.__reportProblematicReplicas(unavailableReplicas, se,
                                             'PFNUnavailable')
        if zeroSizeReplicas:
            self.__reportProblematicReplicas(zeroSizeReplicas, se,
                                             'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->LFC checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->LFC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.__reportProblematicReplicas(notRegisteredLfns, storageElement,
                                             'LFNNotRegistered')
        if failedLfns:
            return S_ERROR('Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self.__getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.items():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.__reportProblematicReplicas(sizeMismatch, storageElement,
                                             'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        return self.__getStorageDirectoryContents(lfnDir, storageElement)

    def __getStorageDirectoryContents(self, lfnDir, storageElement):
        """ Obtians the contents of the supplied directory on the storage
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].items():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.items():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if metadata['Size'] == 0:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.__reportProblematicReplicas(zeroSizeFiles, storageElement,
                                             'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def __getStoragePathExists(self, lfnPaths, storageElement):
        gLogger.info('Determining the existance of %d files at %s' %
                     (len(lfnPaths), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnPaths)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of paths",
                          res['Message'])
            return res
        for lfnPath, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of path',
                          '%s %s' % (lfnPath, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of paths')
        pathExists = res['Value']['Successful']
        resDict = {}
        for lfn, exists in pathExists.items():
            if exists:
                resDict[lfn] = True
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = lfnDir
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.items():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].items():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)

    def __getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK(allReplicas)

    def __getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        if missingCatalogFiles:
            self.__reportProblematicFiles(missingCatalogFiles,
                                          'LFNCatalogMissing')
        for lfn, metadata in allMetadata.items():
            if metadata['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK(allMetadata)

    ##########################################################################
    #
    # This section contains the methods for inserting problematic files into the integrity DB
    #

    def __reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sortList(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(lfn) == types.ListType:
            lfns = lfn
        elif type(lfn) == types.StringType:
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def __reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sortList(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(replicaTuple) == types.TupleType:
            replicaTuple = [replicaTuple]
        elif type(replicaTuple) == types.ListType:
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res


#   def __getRegisteredPFNLFN( self, pfn, storageElement ):
#
#     res = StorageElement( storageElement ).getURL( pfn )
#     if not res['OK']:
#       gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
#       return res
#     for pfn, error in res['Value']['Failed'].items():
#       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
#       return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
#     registeredPFN = res['Value']['Successful'][pfn]
#     res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) )
#     if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
#       return S_OK( False )
#     return S_OK( res['Value'] )

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if (catalogSize == bookkeepingSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.__reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

reqClient = ReqClient()
fc = FileCatalog()

for lfnList in breakListIntoChunks( lfns, 100 ):

  oRequest = Request()
  oRequest.RequestName = "%s_%s" % ( md5( repr( time.time() ) ).hexdigest()[:16], md5( repr( time.time() ) ).hexdigest()[:16] )

  replicateAndRegister = Operation()
  replicateAndRegister.Type = 'ReplicateAndRegister'
  replicateAndRegister.TargetSE = targetSE

  res = fc.getFileMetadata( lfnList )
  if not res['OK']:
    print "Can't get file metadata: %s" % res['Message']
    DIRAC.exit( 1 )
  if res['Value']['Failed']:
    print "Could not get the file metadata of the following, so skipping them:"
    for fFile in res['Value']['Failed']:
      print fFile

  lfnMetadata = res['Value']['Successful']

  for lfn in lfnMetadata:
    rarFile = File()
    rarFile.LFN = lfn
    rarFile.Size = lfnMetadata[lfn]['Size']
    rarFile.Checksum = lfnMetadata[lfn]['Checksum']
Exemplo n.º 7
0
requestOperation = 'RemoveReplica'
if targetSE == 'All':
    requestOperation = 'RemoveFile'

for lfnList in breakListIntoChunks(lfns, 100):

    oRequest = Request()
    requestName = "%s_%s" % (md5(repr(time.time())).hexdigest()[:16],
                             md5(repr(time.time())).hexdigest()[:16])
    oRequest.RequestName = requestName

    oOperation = Operation()
    oOperation.Type = requestOperation
    oOperation.TargetSE = targetSE

    res = fc.getFileMetadata(lfnList)
    if not res['OK']:
        print("Can't get file metadata: %s" % res['Message'])
        DIRAC.exit(1)
    if res['Value']['Failed']:
        print(
            "Could not get the file metadata of the following, so skipping them:"
        )
        for fFile in res['Value']['Failed']:
            print(fFile)

    lfnMetadata = res['Value']['Successful']

    for lfn in lfnMetadata:
        rarFile = File()
        rarFile.LFN = lfn
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from DIRAC.Core.Utilities.List import breakListIntoChunks

    lfnChunks = breakListIntoChunks(lfnList, 100)
    multiRequests = len(lfnChunks) > 1

    error = 0
    count = 0
    reqClient = ReqClient()
    fc = FileCatalog()
    requestIDs = []
    for lfnChunk in lfnChunks:
        metaDatas = fc.getFileMetadata(lfnChunk)
        if not metaDatas["OK"]:
            gLogger.error("unable to read metadata for lfns: %s" %
                          metaDatas["Message"])
            error = -1
            continue
        metaDatas = metaDatas["Value"]
        for failedLFN, reason in metaDatas["Failed"].items():
            gLogger.error("skipping %s: %s" % (failedLFN, reason))
        lfnChunk = set(metaDatas["Successful"])

        if not lfnChunk:
            gLogger.error("LFN list is empty!!!")
            error = -1
            continue
Exemplo n.º 9
0
class DataIntegrityClient( Client ):

  """  
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB
 
  """

  def __init__( self, **kwargs ):

    Client.__init__( self, **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  ##########################################################################
  #
  # This section contains the specific methods for LFC->SE checks
  #

  def catalogDirectoryToSE( self, lfnDir ):
    """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getCatalogDirectoryContents( lfnDir )
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def catalogFileToSE( self, lfns ):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfns ) in types.StringTypes:
      lfns = [lfns]
    res = self.__getCatalogMetadata( lfns )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    res = self.__getCatalogReplicas( catalogMetadata.keys() )
    if not res['OK']:
      return res
    replicas = res['Value']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses )

  def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
    sePfns = {}
    pfnLfns = {}
    for lfn, replicaDict in replicas.items():
      for se, pfn in replicaDict.items():
        if ( ses ) and ( se not in ses ):
          continue
        if not sePfns.has_key( se ):
          sePfns[se] = []
        sePfns[se].append( pfn )
        pfnLfns[pfn] = lfn
    gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) )
    for site in sortList( sePfns.keys() ):
      files = len( sePfns[site] )
      gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) )

    for se in sortList( sePfns.keys() ):
      pfns = sePfns[se]
      pfnDict = {}
      for pfn in pfns:
        pfnDict[pfn] = pfnLfns[pfn]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata( pfnDict, se )
      if not res['OK']:
        gLogger.error( 'Failed to get physical file metadata.', res['Message'] )
        return res
      for pfn, metadata in res['Value'].items():
        if catalogMetadata.has_key( pfnLfns[pfn] ):
          if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ):
            sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) )
      if sizeMismatch:
        self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' )
    return S_OK()

  def __checkPhysicalFileMetadata( self, pfnLfns, se ):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) )


    res = StorageElement( se ).getFileMetadata( pfnLfns.keys() )

    if not res['OK']:
      gLogger.error( 'Failed to get metadata for pfns.', res['Message'] )
      return res
    pfnMetadataDict = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for pfn, reason in res['Value']['Failed'].items():
      if re.search( 'File does not exist', reason ):
        missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) )
    if missingReplicas:
      self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' )
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for pfn, pfnMetadata in pfnMetadataDict.items():
      if pfnMetadata['Lost']:
        lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) )
      if pfnMetadata['Unavailable']:
        unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) )
      if pfnMetadata['Size'] == 0:
        zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) )
    if lostReplicas:
      self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' )
    if unavailableReplicas:
      self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' )
    if zeroSizeReplicas:
      self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' )
    gLogger.info( 'Checking the integrity of physical files at %s complete' % se )
    return S_OK( pfnMetadataDict )

  ##########################################################################
  #
  # This section contains the specific methods for SE->LFC checks
  #

  def storageDirectoryToCatalog( self, lfnDir, storageElement ):
    """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the SE->LFC check at %s" % storageElement )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getStorageDirectoryContents( lfnDir, storageElement )
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement )
    return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} )

  def __checkCatalogForSEFiles( self, storageMetadata, storageElement ):
    gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) )

    # RF_NOTE : this comment is completely wrong
    # First get all the PFNs as they should be registered in the catalog
    res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    for original, registered in res['Value']['Successful'].items():
      storageMetadata[registered] = storageMetadata.pop( original )
    # Determine whether these PFNs are registered and if so obtain the LFN
    res = self.fc.getLFNForPFN( storageMetadata.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] )
      return res
    failedPfns = res['Value']['Failed']
    notRegisteredPfns = []
    for pfn, error in failedPfns.items():
      if re.search( 'No such file or directory', error ):
        notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) )
        failedPfns.pop( pfn )
    if notRegisteredPfns:
      self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' )
    if failedPfns:
      return S_ERROR( 'Failed to obtain LFNs for PFNs' )
    pfnLfns = res['Value']['Successful']
    for pfn in storageMetadata.keys():
      pfnMetadata = storageMetadata.pop( pfn )
      if pfn in pfnLfns.keys():
        lfn = pfnLfns[pfn]
        storageMetadata[lfn] = pfnMetadata
        storageMetadata[lfn]['PFN'] = pfn
    # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
    res = self.__getCatalogMetadata( storageMetadata.keys() )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.items():
      lfnStorageMetadata = storageMetadata[lfn]
      if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ):
        sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) )
    if sizeMismatch:
      self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' )
    gLogger.info( 'Checking storage files exist in the catalog complete' )
    resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata}
    return S_OK( resDict )

  def getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    return self.__getStorageDirectoryContents( lfnDir, storageElement )

  def __getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ Obtians the contents of the supplied directory on the storage
    """
    gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnDir )

    if not res['OK']:
      gLogger.error( "Failed to get PFNs for directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectories = res['Value']['Successful'].values()
    res = se.exists( storageDirectories )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of directory' )
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sortList( directoryExists.keys() ):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append( directory )
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
        return S_ERROR( res['Value']['Failed'][currentDir] )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        fileMetadata = dirContents['Files']

        # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention
#         res = { "Successful" : {}, "Failed" : {} }
#         for pfn in fileMetadata:
#           inRes = se.getPfnPath( pfn )
#           if inRes["OK"]:
#             res["Successful"][pfn] = inRes["Value"]
#           else:
#             res["Failed"][pfn] = inRes["Message"]
        res = se.getLfnForPfn( fileMetadata.keys() )
        if not res['OK']:
          gLogger.error( 'Failed to get directory content LFNs', res['Message'] )
          return res

        for pfn, error in res['Value']['Failed'].items():
          gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) )
        if res['Value']['Failed']:
          return S_ERROR( "Failed to get LFNs for PFNs" )
        pfnLfns = res['Value']['Successful']
        for pfn, lfn in pfnLfns.items():
          fileMetadata[pfn]['LFN'] = lfn
        allFiles.update( fileMetadata )
    zeroSizeFiles = []
    lostFiles = []
    unavailableFiles = []
    for pfn in sortList( allFiles.keys() ):
      if os.path.basename( pfn ) == 'dirac_directory':
        allFiles.pop( pfn )
      else:
        metadata = allFiles[pfn]
        if metadata['Size'] == 0:
          zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) )
        # if metadata['Lost']:
        #  lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost'))
        # if metadata['Unavailable']:
        #  unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable'))
    if zeroSizeFiles:
      self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' )
    if lostFiles:
      self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' )
    if unavailableFiles:
      self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' )
    gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) )
    return S_OK( allFiles )

  def __getStoragePathExists( self, lfnPaths, storageElement ):
    gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnPaths )
    if not res['OK']:
      gLogger.error( "Failed to get PFNs for LFNs", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain PFNs from LFNs' )
    lfnPfns = res['Value']['Successful']
    pfnLfns = {}
    for lfn, pfn in lfnPfns.items():
      pfnLfns[pfn] = lfn

    res = se.exists( pfnLfns )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of paths", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of paths' )
    pathExists = res['Value']['Successful']
    resDict = {}
    for pfn, exists in pathExists.items():
      if exists:
        resDict[pfnLfns[pfn]] = pfn
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the specific methods for obtaining replica and metadata information from the catalog
  #

  def __getCatalogDirectoryContents( self, lfnDir ):
    """ Obtain the contents of the supplied directory
    """
    gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) )

    activeDirs = lfnDir
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.fc.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )

    zeroReplicaFiles = []
    zeroSizeFiles = []
    allReplicaDict = {}
    allMetadataDict = {}
    for lfn, lfnDict in allFiles.items():
      lfnReplicas = {}
      for se, replicaDict in lfnDict['Replicas'].items():
        lfnReplicas[se] = replicaDict['PFN']
      if not lfnReplicas:
        zeroReplicaFiles.append( lfn )
      allReplicaDict[lfn] = lfnReplicas
      allMetadataDict[lfn] = lfnDict['MetaData']
      if lfnDict['MetaData']['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) )
    resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict}
    return S_OK( resDict )

  def __getCatalogReplicas( self, lfns ):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) )

    zeroReplicaFiles = []
    res = self.fc.getReplicas( lfns, allStatus = True )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog replicas', res['Message'] )
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File has zero replicas', error ):
        zeroReplicaFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    gLogger.info( 'Obtaining the replicas for files complete' )
    return S_OK( allReplicas )

  def __getCatalogMetadata( self, lfns ):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK( {} )
    gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) )

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog metadata', res['Message'] )
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'No such file or directory', error ):
        missingCatalogFiles.append( lfn )
    if missingCatalogFiles:
      self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' )
    for lfn, metadata in allMetadata.items():
      if metadata['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtaining the catalog metadata complete' )
    return S_OK( allMetadata )

  ##########################################################################
  #
  # This section contains the methods for inserting problematic files into the integrity DB
  #

  def __reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sortList( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( lfn ) == types.ListType:
      lfns = lfn
    elif type( lfn ) == types.StringType:
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def __reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, pfn, se, reason in sortList( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
      else:
        gLogger.info( pfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( replicaTuple ) == types.TupleType:
      replicaTuple = [replicaTuple]
    elif type( replicaTuple ) == types.ListType:
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( res['Message'] )
    return res

  def __getRegisteredPFNLFN( self, pfn, storageElement ):

    res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    registeredPFN = res['Value']['Successful'][pfn]
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) )
    if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
      return S_OK( False )
    return S_OK( res['Value'] )

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = se.getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    problematicDict['PFN'] = res['Value']['Successful'][pfn]

    res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    res = self.__getRegisteredPFNLFN( pfn, seName )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    lfn = res['Value']
    if not lfn:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if res['Value'].has_key( lfn ):
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          pfn = res['Value'][lfn]
          self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
Exemplo n.º 10
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, basestring):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    #FIXME: Unused?
    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    #FIXME: Unused?
    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    #FIXME: Unused?
    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    #FIXME: Unused?
    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    #FIXME: Unused?
    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    #FIXME: Unused?
    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
Exemplo n.º 11
0
class InputDataAgent(OptimizerModule):
    """
      The specific Optimizer must provide the following methods:
      - initializeOptimizer() before each execution cycle
      - checkJob() - the main method called for each job
  """

    #############################################################################
    def initializeOptimizer(self):
        """Initialize specific parameters for JobSanityAgent.
    """
        self.failedMinorStatus = self.am_getOption('/FailedJobStatus',
                                                   'Input Data Not Available')
        #this will ignore failover SE files
        self.checkFileMetadata = self.am_getOption('CheckFileMetadata', True)

        self.dataManager = DataManager()
        self.resourceStatus = ResourceStatus()
        self.fc = FileCatalog()

        self.seToSiteMapping = {}
        self.lastCScheck = 0
        self.cacheLength = 600

        return S_OK()

    #############################################################################
    def checkJob(self, job, classAdJob):
        """
    This method does the optimization corresponding to this Agent, 
    it is call for each job by the Optimizer framework
    """

        result = self.jobDB.getInputData(job)
        if not result['OK']:
            self.log.warn('Failed to get input data from JobdB for %s' % (job))
            self.log.warn(result['Message'])
            return result
        if not result['Value']:
            self.log.verbose('Job %s has no input data requirement' % (job))
            return self.setNextOptimizer(job)

        #Check if we already executed this Optimizer and the input data is resolved
        res = self.getOptimizerJobInfo(job,
                                       self.am_getModuleParam('optimizerName'))
        if res['OK'] and len(res['Value']):
            pass
        else:
            self.log.verbose(
                'Job %s has an input data requirement and will be processed' %
                (job))
            inputData = result['Value']
            result = self.__resolveInputData(job, inputData)
            if not result['OK']:
                self.log.warn(result['Message'])
                return result

        return self.setNextOptimizer(job)

    #############################################################################
    def __resolveInputData(self, job, inputData):
        """This method checks the file catalog for replica information.
    """
        lfns = [fname.replace('LFN:', '') for fname in inputData]

        start = time.time()
        # In order to place jobs on Hold if a certain SE is banned we need first to check first if
        # if the replicas are really available
        replicas = self.dataManager.getActiveReplicas(lfns)
        timing = time.time() - start
        self.log.verbose('Catalog Replicas Lookup Time: %.2f seconds ' %
                         (timing))
        if not replicas['OK']:
            self.log.warn(replicas['Message'])
            return replicas

        replicaDict = replicas['Value']

        siteCandidates = self.__checkReplicas(job, replicaDict)

        if not siteCandidates['OK']:
            self.log.warn(siteCandidates['Message'])
            return siteCandidates

        if self.checkFileMetadata:
            guids = True
            start = time.time()
            guidDict = self.fc.getFileMetadata(lfns)
            timing = time.time() - start
            self.log.info('Catalog Metadata Lookup Time: %.2f seconds ' %
                          (timing))

            if not guidDict['OK']:
                self.log.warn(guidDict['Message'])
                guids = False

            failed = guidDict['Value']['Failed']
            if failed:
                self.log.warn('Failed to establish some GUIDs')
                self.log.warn(failed)
                guids = False

            if guids:
                for lfn, reps in replicaDict['Successful'].items():
                    guidDict['Value']['Successful'][lfn].update(reps)
                replicas = guidDict

        resolvedData = {}
        resolvedData['Value'] = replicas
        resolvedData['SiteCandidates'] = siteCandidates['Value']
        result = self.setOptimizerJobInfo(
            job, self.am_getModuleParam('optimizerName'), resolvedData)
        if not result['OK']:
            self.log.warn(result['Message'])
            return result
        return S_OK(resolvedData)

    #############################################################################
    def __checkReplicas(self, job, replicaDict):
        """Check that all input lfns have valid replicas and can all be found at least in one single site.
    """
        badLFNs = []

        if replicaDict.has_key('Successful'):
            for lfn, reps in replicaDict['Successful'].items():
                if not reps:
                    badLFNs.append('LFN:%s Problem: No replicas available' %
                                   (lfn))
        else:
            return S_ERROR('No replica Info available')

        if replicaDict.has_key('Failed'):
            for lfn, cause in replicaDict['Failed'].items():
                badLFNs.append('LFN:%s Problem: %s' % (lfn, cause))

        if badLFNs:
            self.log.info('Found %s problematic LFN(s) for job %s' %
                          (len(badLFNs), job))
            param = '\n'.join(badLFNs)
            self.log.info(param)
            result = self.setJobParam(job,
                                      self.am_getModuleParam('optimizerName'),
                                      param)
            if not result['OK']:
                self.log.error(result['Message'])
            return S_ERROR('Input Data Not Available')

        return self.__getSiteCandidates(replicaDict['Successful'])

    #############################################################################
    # FIXME: right now this is unused...
    def __checkActiveSEs(self, job, replicaDict):
        """
    Check active SE and replicas and identify possible Site candidates for 
    the execution of the job
    """
        # Now let's check if some replicas might not be available due to banned SE's
        activeReplicas = self.dataManager.checkActiveReplicas(replicaDict)
        if not activeReplicas['OK']:
            # due to banned SE's input data might no be available
            msg = "On Hold: Missing replicas due to banned SE"
            self.log.info(msg)
            self.log.warn(activeReplicas['Message'])
            return S_ERROR(msg)

        activeReplicaDict = activeReplicas['Value']

        siteCandidates = self.__checkReplicas(job, activeReplicaDict)

        if not siteCandidates['OK']:
            # due to a banned SE's input data is not available at a single site
            msg = "On Hold: Input data not Available due to banned SE"
            self.log.info(msg)
            self.log.warn(siteCandidates['Message'])
            return S_ERROR(msg)

        resolvedData = {}
        resolvedData['Value'] = activeReplicas
        resolvedData['SiteCandidates'] = siteCandidates['Value']
        result = self.setOptimizerJobInfo(
            job, self.am_getModuleParam('optimizerName'), resolvedData)
        if not result['OK']:
            self.log.warn(result['Message'])
            return result
        return S_OK(resolvedData)

    #############################################################################
    def __getSitesForSE(self, se):
        """ Returns a list of sites having the given SE as a local one.
        Uses the local cache of the site-se information
    """

        # Empty the cache if too old
        if (time.time() - self.lastCScheck) > self.cacheLength:
            self.log.verbose('Resetting the SE to site mapping cache')
            self.seToSiteMapping = {}
            self.lastCScheck = time.time()

        if se not in self.seToSiteMapping:
            sites = getSitesForSE(se)
            if sites['OK']:
                self.seToSiteMapping[se] = list(sites['Value'])
            return sites
        else:
            return S_OK(self.seToSiteMapping[se])

    #############################################################################
    def __getSiteCandidates(self, inputData):
        """This method returns a list of possible site candidates based on the
       job input data requirement.  For each site candidate, the number of files
       on disk and tape is resolved.
    """

        fileSEs = {}
        for lfn, replicas in inputData.items():
            siteList = []
            for se in replicas.keys():
                sites = self.__getSitesForSE(se)
                if sites['OK']:
                    siteList += sites['Value']
            fileSEs[lfn] = uniqueElements(siteList)

        siteCandidates = []
        i = 0
        for _fileName, sites in fileSEs.items():
            if not i:
                siteCandidates = sites
            else:
                tempSite = []
                for site in siteCandidates:
                    if site in sites:
                        tempSite.append(site)
                siteCandidates = tempSite
            i += 1

        if not len(siteCandidates):
            return S_ERROR('No candidate sites available')

        #In addition, check number of files on tape and disk for each site
        #for optimizations during scheduling
        siteResult = {}
        for site in siteCandidates:
            siteResult[site] = {'disk': [], 'tape': []}

        seDict = {}
        for lfn, replicas in inputData.items():
            for se in replicas.keys():
                if se not in seDict:
                    sites = self.__getSitesForSE(se)
                    if not sites['OK']:
                        continue
                    try:
                        #storageElement = StorageElement( se )
                        result = self.resourceStatus.getStorageElementStatus(
                            se, statusType='ReadAccess')
                        if not result['OK']:
                            continue
                        seDict[se] = {
                            'Sites': sites['Value'],
                            'SEParams': result['Value'][se]
                        }
                        result = getStorageElementOptions(se)
                        if not result['OK']:
                            continue
                        seDict[se]['SEParams'].update(result['Value'])
                    except Exception:
                        self.log.exception(
                            'Failed to instantiate StorageElement( %s )' % se)
                        continue
                for site in seDict[se]['Sites']:
                    if site in siteCandidates:
                        if seDict[se]['SEParams']['ReadAccess'] and seDict[se][
                                'SEParams']['DiskSE']:
                            if lfn not in siteResult[site]['disk']:
                                siteResult[site]['disk'].append(lfn)
                                if lfn in siteResult[site]['tape']:
                                    siteResult[site]['tape'].remove(lfn)
                        if seDict[se]['SEParams']['ReadAccess'] and seDict[se][
                                'SEParams']['TapeSE']:
                            if lfn not in siteResult[site][
                                    'tape'] and lfn not in siteResult[site][
                                        'disk']:
                                siteResult[site]['tape'].append(lfn)

        for site in siteResult:
            siteResult[site]['disk'] = len(siteResult[site]['disk'])
            siteResult[site]['tape'] = len(siteResult[site]['tape'])
        return S_OK(siteResult)
Exemplo n.º 12
0
class File:

  def __init__(self, lfn='', status='', size=0, guid='', checksum=''):
    # These are the possible attributes for a file
    if not type(lfn) in types.StringTypes:
      raise AttributeError, "lfn should be string type"
    self.lfn = str(lfn)
    if not type(status) in types.StringTypes:
      raise AttributeError, "status should be string type"
    self.status = str(status)
    try:
      self.size = int(size)  
    except:
      raise AttributeError, "size should be integer type"
    if not type(guid) in types.StringTypes:
      raise AttributeError, "guid should be string type"
    self.guid = str(guid)
    if not type(checksum) in types.StringTypes:
      raise AttributeError, "checksum should be string type"
    self.checksum = str(checksum)
    self.catalogReplicas = []
    self.fc = FileCatalog()

  def setLFN(self,lfn):
    if not type(lfn) in types.StringTypes: 
      return S_ERROR("LFN should be %s and not %s" % (types.StringType,type(lfn)))
    self.lfn = str(lfn)  
    return S_OK()
      
  def setStatus(self,status):
    if not type(status) in types.StringTypes:
      return S_ERROR("Status should be %s and not %s" % (types.StringType,type(status)))
    self.status = str(status)
    return S_OK()

  def setSize(self,size):
    try:
      self.size = int(size)
      return S_OK()
    except:
      return S_ERROR("Size should be %s and not %s" % (types.IntType,type(size)))
      
  def setGUID(self,guid):
    if not type(guid) in types.StringTypes:
      return S_ERROR("GUID should be %s and not %s" % (types.StringType,type(guid)))
    self.guid = str(guid)
    return S_OK()

  def setChecksum(self,checksum):
    if not type(checksum) in types.StringTypes:
      return S_ERROR("Checksum should be %s and not %s" % (types.StringType,type(checksum)))
    self.checksum = str(checksum)
    return S_OK()

  def addCatalogReplica(self,se,pfn,status='U'):
    for replica in self.catalogReplicas:
      if (replica.pfn == pfn) and (replica.se == se):
        return S_OK()
    oCatalogReplica = CatalogReplica(pfn=pfn,storageElement=se,status=status)
    self.catalogReplicas.append(oCatalogReplica)
    return S_OK()
  
  def getLFN(self):
    return S_OK(self.lfn)

  def getStatus(self):
    if self.status:
      return S_OK(self.status)
    if not self.lfn:
      return S_ERROR('No LFN is known')
    res = self.__populateMetadata()
    if not res['OK']:
      return res
    return S_OK(self.status)

  def getSize(self):
    if self.size:
      return S_OK(self.size)
    if not self.lfn:
      return S_ERROR('No LFN is known')
    res = self.__populateMetadata()
    if not res['OK']:
      return res
    return S_OK(self.size)

  def getGUID(self):
    if self.guid:
      return S_OK(self.guid)
    if not self.lfn:
      return S_ERROR('No LFN is known')
    res = self.__populateMetadata()
    if not res['OK']:
      return res
    return S_OK(self.guid)

  def getChecksum(self):
    if self.checksum:
      return S_OK(self.checksum)
    if not self.lfn:
      return S_ERROR('No LFN is known')
    res = self.__populateMetadata()
    if not res['OK']:
      return res
    return S_OK(self.checksum)

  def __populateMetadata(self):
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( self.lfn ) )
    if not res['OK']:
      return res
    metadata = res['Value']
    self.setChecksum(metadata['Checksum'])
    self.setGUID(metadata['GUID'])
    self.setSize(metadata['Size']) 
    self.setStatus(metadata['Status'])
    return S_OK()

  def hasCatalogReplicas(self):
    if self.catalogReplicas:
      return S_OK(True)
    return S_OK(False)

  def clearCatalogReplicas(self):
    self.catalogReplicas = []
    return S_OK()

  def getReplicas(self):
    if not self.lfn:
      return S_ERROR('No LFN is known')
    if self.catalogReplicas:
      replicas = {}
      for replica in self.catalogReplicas:
        replicas[replica.se] = replica.pfn
      return S_OK(replicas)
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getCatalogReplicas( self.lfn ) )
    if not res['OK']:
      return res
    replicas = res['Value']
    for se,pfn in replicas.items():
      oCatalogReplica = CatalogReplica(pfn=pfn,storageElement=se,status='U')
      self.catalogReplicas.append(oCatalogReplica)
    return S_OK(replicas)
  
  def digest(self):
    """ Get short description string of file attributes
    """
    return S_OK("%s:%s:%d:%s:%s" % (self.lfn,self.status,self.size,self.guid,self.checksum))

  def toCFG(self):
    """ Get the full description of the file in CFG format
    """
    oCFG = CFG()
    strippedLFN = self.lfn.replace('/','&&')
    oCFG.createNewSection(strippedLFN)
    oCFG.setOption('%s/Status' % (strippedLFN), self.status)    
    oCFG.setOption('%s/Size' % (strippedLFN), self.size)    
    oCFG.setOption('%s/GUID' % (strippedLFN), self.guid)    
    oCFG.setOption('%s/Checksum' % (strippedLFN), self.checksum)
    #TODO: still have to include the CFG from the replica objects 
    if self.catalogReplicas:
      oCFG.createNewSection('%s/CatalogReplicas' % strippedLFN)
      for replica in self.catalogReplicas:
        pass
        #  rCFG.mergeWith(CFG().loadFromBuffer(replica.toCFG()['Value']))
    return S_OK(str(oCFG))
Exemplo n.º 13
0
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super(DataIntegrityClient, self).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  #FIXME: Unused?
  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = returnSingleResult( se.removeFile( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = returnSingleResult( se.getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = returnSingleResult( se.getURL( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )

    problematicDict['PFN'] = res['Value']

    res = returnSingleResult( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  #FIXME: Unused?
  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = returnSingleResult( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  #FIXME: Unused?
  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = returnSingleResult( StorageElement( se ).exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = returnSingleResult( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  #FIXME: Unused?
  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = returnSingleResult( StorageElement( se ).getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  #FIXME: Unused?
  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = returnSingleResult( se.getFileSize( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = returnSingleResult( se.removeFile( lfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )


    res = returnSingleResult( self.fc.getReplicas( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if seName not in res['Value']:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  #FIXME: Unused?
  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if lfn in res['Value']:
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          self.reportProblematicReplicas( [( lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = returnSingleResult( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( 'DataIntegrityClient: failed to remove file', res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )


  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )
Exemplo n.º 14
0
class ReplicateAndRegister(DMSRequestOperationsBase):
    """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """
    def __init__(self, operation=None, csPath=None):
        """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
        super(ReplicateAndRegister, self).__init__(operation, csPath)
        # # own gMonitor stuff for files
        gMonitor.registerActivity("ReplicateAndRegisterAtt",
                                  "Replicate and register attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateOK", "Replications successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateFail", "Replications failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterOK", "Registrations successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterFail", "Registrations failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # for FTS
        gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # SE cache

        # Clients
        self.fc = FileCatalog()

    def __call__(self):
        """ call me maybe """
        # # check replicas first
        checkReplicas = self.__checkReplicas()
        if not checkReplicas["OK"]:
            self.log.error('Failed to check replicas',
                           checkReplicas["Message"])
        if hasattr(self, "FTSMode") and getattr(self, "FTSMode"):
            bannedGroups = getattr(self, "FTSBannedGroups") if hasattr(
                self, "FTSBannedGroups") else ()
            if self.request.OwnerGroup in bannedGroups:
                self.log.verbose(
                    "usage of FTS system is banned for request's owner")
                return self.dmTransfer()

            if getattr(self, 'UseNewFTS3', False):
                return self.fts3Transfer()
            else:
                return self.ftsTransfer()

        return self.dmTransfer()

    def __checkReplicas(self):
        """ check done replicas and update file states  """
        waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation
                             if opFile.Status in ("Waiting", "Scheduled")])
        targetSESet = set(self.operation.targetSEList)

        replicas = self.fc.getReplicas(waitingFiles.keys())
        if not replicas["OK"]:
            self.log.error('Failed to get replicas', replicas["Message"])
            return replicas

        reMissing = re.compile(r".*such file.*")
        for failedLFN, errStr in replicas["Value"]["Failed"].iteritems():
            waitingFiles[failedLFN].Error = errStr
            if reMissing.search(errStr.lower()):
                self.log.error("File does not exists", failedLFN)
                gMonitor.addMark("ReplicateFail", len(targetSESet))
                waitingFiles[failedLFN].Status = "Failed"

        for successfulLFN, reps in replicas["Value"]["Successful"].iteritems():
            if targetSESet.issubset(set(reps)):
                self.log.info("file %s has been replicated to all targets" %
                              successfulLFN)
                waitingFiles[successfulLFN].Status = "Done"

        return S_OK()

    def _addMetadataToFiles(self, toSchedule):
        """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': opFile, 'lfn2': opFile}
    """
        if toSchedule:
            self.log.info(
                "found %s files to schedule, getting metadata from FC" %
                len(toSchedule))
        else:
            self.log.verbose("No files to schedule")
            return S_OK([])

        res = self.fc.getFileMetadata(toSchedule.keys())
        if not res['OK']:
            return res
        else:
            if res['Value']['Failed']:
                self.log.warn(
                    "Can't schedule %d files: problems getting the metadata: %s"
                    % (len(res['Value']['Failed']), ', '.join(
                        res['Value']['Failed'])))
            metadata = res['Value']['Successful']

        filesToSchedule = {}

        for lfn, lfnMetadata in metadata.iteritems():
            opFileToSchedule = toSchedule[lfn][0]
            opFileToSchedule.GUID = lfnMetadata['GUID']
            # In principle this is defined already in filterReplicas()
            if not opFileToSchedule.Checksum:
                opFileToSchedule.Checksum = metadata[lfn]['Checksum']
                opFileToSchedule.ChecksumType = metadata[lfn]['ChecksumType']
            opFileToSchedule.Size = metadata[lfn]['Size']

            filesToSchedule[opFileToSchedule.LFN] = opFileToSchedule

        return S_OK(filesToSchedule)

    def _filterReplicas(self, opFile):
        """ filter out banned/invalid source SEs """
        return filterReplicas(opFile, logger=self.log, dataManager=self.dm)

    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        delayExecution = 0
        errors = defaultdict(int)
        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas.get("Valid")
            noMetaReplicas = replicas.get("NoMetadata")
            noReplicas = replicas.get('NoReplicas')
            badReplicas = replicas.get('Bad')
            noActiveReplicas = replicas.get('NoActiveReplicas')

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [
                        opFile, validReplicas, validTargets
                    ]
            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    err = "Couldn't get metadata"
                    errors[err] += 1
                    self.log.verbose(
                        "unable to schedule '%s', %s at %s" %
                        (opFile.LFN, err, ','.join(noMetaReplicas)))
                    opFile.Error = err
                elif noReplicas:
                    err = "File doesn't exist"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s %s at %s" %
                        (opFile.LFN, err, ','.join(noReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif badReplicas:
                    err = "All replicas have a bad checksum"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(badReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif noActiveReplicas:
                    err = "No active replica found"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(noActiveReplicas)))
                    opFile.Error = err
                    # All source SEs are banned, delay execution by 1 hour
                    delayExecution = 60

        if delayExecution:
            self.log.info("Delay execution of the request by %d minutes" %
                          delayExecution)
            self.request.delayNextExecution(delayExecution)
        # Log error counts
        for error, count in errors.iteritems():
            self.log.error(error, 'for %d files' % count)

        filesToScheduleList = []
        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToSchedule = res['Value']

            for lfn in filesToSchedule:
                filesToScheduleList.append(
                    (filesToSchedule[lfn][0].toJSON()['Value'],
                     toSchedule[lfn][1], toSchedule[lfn][2]))

        if filesToScheduleList:

            ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID,
                                                  self.operation.OperationID,
                                                  filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            self.log.info("%d files have been scheduled to FTS" %
                          len(ftsSchedule['Successful']))
            for opFile in self.operation:
                fileID = opFile.FileID
                if fileID in ftsSchedule["Successful"]:
                    gMonitor.addMark("FTSScheduleOK", 1)
                    opFile.Status = "Scheduled"
                    self.log.debug("%s has been scheduled for FTS" %
                                   opFile.LFN)
                elif fileID in ftsSchedule["Failed"]:
                    gMonitor.addMark("FTSScheduleFail", 1)
                    opFile.Error = ftsSchedule["Failed"][fileID]
                    if 'sourceSURL equals to targetSURL' in opFile.Error:
                        # In this case there is no need to continue
                        opFile.Status = 'Failed'
                    self.log.warn("unable to schedule %s for FTS: %s" %
                                  (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)

    def _checkExistingFTS3Operations(self):
        """
       Check if there are ongoing FTS3Operation for the current RMS Operation

       Under some conditions, we can be trying to schedule files while
       there is still an FTS transfer going on. This typically happens
       when the REA hangs. To prevent further race condition, we check
       if there are FTS3Operations in a non Final state matching the
       current operation ID. If so, we put the corresponding files in
       scheduled mode. We will then wait till the FTS3 Operation performs
       the callback

       :returns: S_OK with True if we can go on, False if we should stop the processing
    """

        res = FTS3Client().getOperationsFromRMSOpID(self.operation.OperationID)

        if not res['OK']:
            self.log.debug("Could not get FTS3Operations matching OperationID",
                           self.operation.OperationID)
            return res

        existingFTSOperations = res['Value']
        # It is ok to have FTS Operations in a final state, so we
        # care only about the others
        unfinishedFTSOperations = [
            ops for ops in existingFTSOperations
            if ops.status not in FTS3TransferOperation.FINAL_STATES
        ]

        if not unfinishedFTSOperations:
            self.log.debug("No ongoing FTS3Operations, all good")
            return S_OK(True)

        self.log.warn(
            "Some FTS3Operations already exist for the RMS Operation:",
            [op.operationID for op in unfinishedFTSOperations])

        # This would really be a screwed up situation !
        if len(unfinishedFTSOperations) > 1:
            self.log.warn("That's a serious problem !!")

        # We take the rmsFileID of the files in the Operations,
        # find the corresponding File object, and set them scheduled
        rmsFileIDsToSetScheduled = set([
            ftsFile.rmsFileID for ftsOp in unfinishedFTSOperations
            for ftsFile in ftsOp.ftsFiles
        ])

        for opFile in self.operation:
            # If it is in the DB, it has a FileID
            opFileID = opFile.FileID
            if opFileID in rmsFileIDsToSetScheduled:
                self.log.warn("Setting RMSFile as already scheduled", opFileID)
                opFile.Status = "Scheduled"

        # We return here such that the Request is set back to Scheduled in the DB
        # With no further modification
        return S_OK(False)

    def fts3Transfer(self):
        """ replicate and register using FTS3 """

        self.log.info("scheduling files in FTS3...")

        # Check first if we do not have ongoing transfers

        res = self._checkExistingFTS3Operations()
        if not res['OK']:
            return res

        # if res['Value'] is False
        # it means that there are ongoing transfers
        # and we should stop here
        if res['Value'] is False:
            # return S_OK such that the request is put back
            return S_OK()

        fts3Files = []
        toSchedule = {}

        # Dict which maps the FileID to the object
        rmsFilesIds = {}

        for opFile in self.getWaitingFilesList():
            rmsFilesIds[opFile.FileID] = opFile

            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas["Valid"]
            noMetaReplicas = replicas["NoMetadata"]
            noReplicas = replicas['NoReplicas']
            badReplicas = replicas['Bad']
            noPFN = replicas['NoPFN']

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [opFile, validTargets]

            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    self.log.warn(
                        "unable to schedule '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = "Couldn't get metadata"
                elif noReplicas:
                    self.log.error(
                        "Unable to schedule transfer",
                        "File %s doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = 'No replicas found'
                    opFile.Status = 'Failed'
                elif badReplicas:
                    self.log.error(
                        "Unable to schedule transfer",
                        "File %s, all replicas have a bad checksum at %s" %
                        (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = 'All replicas have a bad checksum'
                    opFile.Status = 'Failed'
                elif noPFN:
                    self.log.warn(
                        "unable to schedule %s, could not get a PFN at %s" %
                        (opFile.LFN, ','.join(noPFN)))

        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToSchedule = res['Value']

            for lfn in filesToSchedule:
                opFile = filesToSchedule[lfn]
                validTargets = toSchedule[lfn][1]
                for targetSE in validTargets:
                    ftsFile = FTS3File.fromRMSFile(opFile, targetSE)
                    fts3Files.append(ftsFile)

        if fts3Files:
            res = Registry.getUsernameForDN(self.request.OwnerDN)
            if not res['OK']:
                self.log.error(
                    "Cannot get username for DN",
                    "%s %s" % (self.request.OwnerDN, res['Message']))
                return res

            username = res['Value']
            fts3Operation = FTS3TransferOperation.fromRMSObjects(
                self.request, self.operation, username)
            fts3Operation.ftsFiles = fts3Files

            ftsSchedule = FTS3Client().persistOperation(fts3Operation)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS3:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule)

            self.log.info("%d files have been scheduled to FTS3" %
                          len(fts3Files))

            for ftsFile in fts3Files:
                opFile = rmsFilesIds[ftsFile.rmsFileID]
                gMonitor.addMark("FTSScheduleOK", 1)
                opFile.Status = "Scheduled"
                self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)

    def dmTransfer(self, fromFTS=False):
        """ replicate and register using dataManager  """
        # # get waiting files. If none just return
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            bannedSource = self.checkSEsRSS(sourceSE, 'ReadAccess')
            if not bannedSource["OK"]:
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return bannedSource

            if bannedSource["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.info(self.operation.Error)
                return S_OK(self.operation.Error)

        # # check targetSEs for write
        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
            gMonitor.addMark("ReplicateFail", len(self.operation))
            return bannedTargets

        if bannedTargets['Value']:
            self.operation.Error = "%s targets are banned for writing" % ",".join(
                bannedTargets['Value'])
            return S_OK(self.operation.Error)

        # Can continue now
        self.log.verbose("No targets banned for writing")

        waitingFiles = self.getWaitingFilesList()
        if not waitingFiles:
            return S_OK()
        # # loop over files
        if fromFTS:
            self.log.info(
                "Trying transfer using replica manager as FTS failed")
        else:
            self.log.info("Transferring files using Data manager...")
        errors = defaultdict(int)
        delayExecution = 0
        for opFile in waitingFiles:
            if opFile.Error in (
                    "Couldn't get metadata",
                    "File doesn't exist",
                    'No active replica found',
                    "All replicas have a bad checksum",
            ):
                err = "File already in error status"
                errors[err] += 1

            gMonitor.addMark("ReplicateAndRegisterAtt", 1)
            opFile.Error = ''
            lfn = opFile.LFN

            # Check if replica is at the specified source
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                self.log.error('Failed to check replicas', replicas["Message"])
                continue
            replicas = replicas["Value"]
            validReplicas = replicas.get("Valid")
            noMetaReplicas = replicas.get("NoMetadata")
            noReplicas = replicas.get('NoReplicas')
            badReplicas = replicas.get('Bad')
            noActiveReplicas = replicas.get('NoActiveReplicas')

            if not validReplicas:
                gMonitor.addMark("ReplicateFail")
                if noMetaReplicas:
                    err = "Couldn't get metadata"
                    errors[err] += 1
                    self.log.verbose(
                        "unable to replicate '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = err
                elif noReplicas:
                    err = "File doesn't exist"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to replicate", "File %s doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif badReplicas:
                    err = "All replicas have a bad checksum"
                    errors[err] += 1
                    self.log.error(
                        "Unable to replicate",
                        "%s, all replicas have a bad checksum at %s" %
                        (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif noActiveReplicas:
                    err = "No active replica found"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(noActiveReplicas)))
                    opFile.Error = err
                    # All source SEs are banned, delay execution by 1 hour
                    delayExecution = 60
                continue
            # # get the first one in the list
            if sourceSE not in validReplicas:
                if sourceSE:
                    err = "File not at specified source"
                    errors[err] += 1
                    self.log.warn(
                        "%s is not at specified sourceSE %s, changed to %s" %
                        (lfn, sourceSE, validReplicas[0]))
                sourceSE = validReplicas[0]

            # # loop over targetSE
            catalogs = self.operation.Catalog
            if catalogs:
                catalogs = [cat.strip() for cat in catalogs.split(',')]

            for targetSE in self.operation.targetSEList:

                # # call DataManager
                if targetSE in validReplicas:
                    self.log.warn(
                        "Request to replicate %s to an existing location: %s" %
                        (lfn, targetSE))
                    continue
                res = self.dm.replicateAndRegister(lfn,
                                                   targetSE,
                                                   sourceSE=sourceSE,
                                                   catalog=catalogs)
                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            prString = "file %s replicated at %s in %s s." % (
                                lfn, targetSE, repTime)

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                prString += ' and registered in %s s.' % regTime
                                self.log.info(prString)
                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                prString += " but failed to register"
                                self.log.warn(prString)

                                opFile.Error = "Failed to register"
                                # # add register replica operation
                                registerOperation = self.getRegisterOperation(
                                    opFile, targetSE, type='RegisterReplica')
                                self.request.insertAfter(
                                    registerOperation, self.operation)

                        else:

                            self.log.error("Failed to replicate",
                                           "%s to %s" % (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error("Failed to replicate and register",
                                       "File %s at %s:" % (lfn, targetSE),
                                       reason)
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "DataManager error: %s" % res["Message"]
                    self.log.error("DataManager error", res["Message"])

            if not opFile.Error:
                if len(self.operation.targetSEList) > 1:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                opFile.Status = "Done"
        # Log error counts
        if delayExecution:
            self.log.info("Delay execution of the request by %d minutes" %
                          delayExecution)
            self.request.delayNextExecution(delayExecution)
        for error, count in errors.iteritems():
            self.log.error(error, 'for %d files' % count)

        return S_OK()
Exemplo n.º 15
0
def main():
    Script.parseCommandLine(ignoreErrors=False)

    args = Script.getPositionalArgs()
    if len(args) < 2:
        Script.showHelp()

    targetSE = args.pop(0)

    lfns = []
    for inputFileName in args:
        if os.path.exists(inputFileName):
            inputFile = open(inputFileName, 'r')
            string = inputFile.read()
            inputFile.close()
            lfns.extend([lfn.strip() for lfn in string.splitlines()])
        else:
            lfns.append(inputFileName)

    from DIRAC.Resources.Storage.StorageElement import StorageElement
    import DIRAC
    # Check is provided SE is OK
    if targetSE != 'All':
        se = StorageElement(targetSE)
        if not se.valid:
            print(se.errorReason)
            print()
            Script.showHelp()

    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    reqClient = ReqClient()
    fc = FileCatalog()

    requestOperation = 'RemoveReplica'
    if targetSE == 'All':
        requestOperation = 'RemoveFile'

    for lfnList in breakListIntoChunks(lfns, 100):

        oRequest = Request()
        requestName = "%s_%s" % (
            md5(repr(time.time()).encode()).hexdigest()[:16],
            md5(repr(time.time()).encode()).hexdigest()[:16],
        )
        oRequest.RequestName = requestName

        oOperation = Operation()
        oOperation.Type = requestOperation
        oOperation.TargetSE = targetSE

        res = fc.getFileMetadata(lfnList)
        if not res['OK']:
            print("Can't get file metadata: %s" % res['Message'])
            DIRAC.exit(1)
        if res['Value']['Failed']:
            print(
                "Could not get the file metadata of the following, so skipping them:"
            )
            for fFile in res['Value']['Failed']:
                print(fFile)

        lfnMetadata = res['Value']['Successful']

        for lfn in lfnMetadata:
            rarFile = File()
            rarFile.LFN = lfn
            rarFile.Size = lfnMetadata[lfn]['Size']
            rarFile.Checksum = lfnMetadata[lfn]['Checksum']
            rarFile.GUID = lfnMetadata[lfn]['GUID']
            rarFile.ChecksumType = 'ADLER32'
            oOperation.addFile(rarFile)

        oRequest.addOperation(oOperation)

        isValid = RequestValidator().validate(oRequest)
        if not isValid['OK']:
            print("Request is not valid: ", isValid['Message'])
            DIRAC.exit(1)

        result = reqClient.putRequest(oRequest)
        if result['OK']:
            print('Request %d Submitted' % result['Value'])
        else:
            print('Failed to submit Request: ', result['Message'])
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(" SE:   StorageElement|All")
    Script.registerArgument(["LFN:  LFN or file containing a List of LFNs"])
    Script.parseCommandLine(ignoreErrors=False)

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    args = Script.getPositionalArgs()

    targetSE = args.pop(0)

    lfns = []
    for inputFileName in args:
        if os.path.exists(inputFileName):
            with open(inputFileName, "r") as inputFile:
                string = inputFile.read()
            lfns.extend([lfn.strip() for lfn in string.splitlines()])
        else:
            lfns.append(inputFileName)

    from DIRAC.Resources.Storage.StorageElement import StorageElement
    import DIRAC

    # Check is provided SE is OK
    if targetSE != "All":
        se = StorageElement(targetSE)
        if not se.valid:
            print(se.errorReason)
            print()
            Script.showHelp()

    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    reqClient = ReqClient()
    fc = FileCatalog()

    requestOperation = "RemoveReplica"
    if targetSE == "All":
        requestOperation = "RemoveFile"

    for lfnList in breakListIntoChunks(lfns, 100):

        oRequest = Request()
        requestName = "%s_%s" % (
            md5(repr(time.time()).encode()).hexdigest()[:16],
            md5(repr(time.time()).encode()).hexdigest()[:16],
        )
        oRequest.RequestName = requestName

        oOperation = Operation()
        oOperation.Type = requestOperation
        oOperation.TargetSE = targetSE

        res = fc.getFileMetadata(lfnList)
        if not res["OK"]:
            print("Can't get file metadata: %s" % res["Message"])
            DIRAC.exit(1)
        if res["Value"]["Failed"]:
            print(
                "Could not get the file metadata of the following, so skipping them:"
            )
            for fFile in res["Value"]["Failed"]:
                print(fFile)

        lfnMetadata = res["Value"]["Successful"]

        for lfn in lfnMetadata:
            rarFile = File()
            rarFile.LFN = lfn
            rarFile.Size = lfnMetadata[lfn]["Size"]
            rarFile.Checksum = lfnMetadata[lfn]["Checksum"]
            rarFile.GUID = lfnMetadata[lfn]["GUID"]
            rarFile.ChecksumType = "ADLER32"
            oOperation.addFile(rarFile)

        oRequest.addOperation(oOperation)

        isValid = RequestValidator().validate(oRequest)
        if not isValid["OK"]:
            print("Request is not valid: ", isValid["Message"])
            DIRAC.exit(1)

        result = reqClient.putRequest(oRequest)
        if result["OK"]:
            print("Request %d Submitted" % result["Value"])
        else:
            print("Failed to submit Request: ", result["Message"])
Exemplo n.º 17
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.newlyCompletedFiles = []
    self.newlyFailedFiles = []

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''
    # # not used
    self.priority = 3

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # whatever
    self.dumpStr = ''

    # # placeholder for surl file
    self.surlFile = None

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False


    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def getSourceSE( self ):
    """ source SE getter

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "Source SE not defined" )
    return S_OK( self.sourceSE )

  def setSourceToken( self, token ):
    """ set source space token

    :param self: self reference
    :param str token: source space token
    """
    self.sourceToken = token
    return S_OK()

  def getSourceToken( self ):
    """ source space token getter

    :param self: self reference
    """
    if not self.sourceToken:
      return S_ERROR( "Source token not defined" )
    return S_OK( self.sourceToken )

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      res = self.oSourceSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for SourceSE %s: %s" % ( self.sourceSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.sourceSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def getTargetSE( self ):
    """ target SE getter

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "Target SE not defined" )
    return S_OK( self.targetSE )

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def getTargetToken( self ):
    """ target space token getter

    :param self: self reference
    """
    if not self.targetToken:
      return S_ERROR( "Target token not defined" )
    return S_OK( self.targetToken )

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      res = self.oTargetSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for TargetSE %s: %s" % ( self.targetSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.targetSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( "SRM2" )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()

  def getFTSGUID( self ):
    """ FTS job GUID getter

    :param self: self refenece
    """
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK( self.ftsGUID )

  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def getFTSServer( self ):
    """ FTS server getter

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    return S_OK( self.ftsServer )

  def setPriority( self, priority ):
    """ set priority for FTS job

    :param self: self reference
    :param int priority: a new priority
    """
    if not type( priority ) in ( IntType, LongType ):
      return S_ERROR( "Priority must be integer" )
    if priority < 0:
      priority = 0
    elif priority > 5:
      priority = 5
    self.priority = priority
    return S_OK( self.priority )

  def getPriority( self ):
    """ FTS job priority getter

    :param self: self reference
    """
    return S_OK( self.priority )

  def getPercentageComplete( self ):
    """ get completness percentage

    :param self: self reference
    """
    completedFiles = 0
    totalFiles = 0
    for state in self.statusSummary:
      if state in self.successfulStates:
        completedFiles += self.statusSummary[state]
      totalFiles += self.statusSummary[state]
    self.percentageComplete = ( float( completedFiles ) * 100.0 ) / float( totalFiles )
    return S_OK( self.percentageComplete )

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def getStatus( self ):
    """ get FTS job status

    :param self: self reference
    """
    return S_OK( self.requestStatus )


  def setCksmType( self, cksm = None ):
    """ set checksum type to use

    :param self: self reference
    :param mixed cksm: checksum type, should be one of 'Adler32', 'md5', 'sha1', None
    """
    if str( cksm ).upper() not in ( "ADLER32", "MD5", "SHA1", "NONE" ):
      return S_ERROR( "Not supported checksum type: %s" % str( cksm ) )
    if not cksm:
      self.__cksmType = None
      return S_OK( False )
    self.__cksmType = str( cksm ).upper()
    return S_OK( True )

  def getCksmType( self ):
    """ get checksum type

    :param self: self reference
    """
    return S_OK( self.__cksmType )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  def getCksmTest( self ):
    """ get cksm test flag

    :param self: self reference
    """
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setStatus( self, lfn, status ):
    """ set status of a file """
    return( self.__setFileParameter( lfn, 'Status', status ) )

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return( self.__setFileParameter( lfn, 'Source', surl ) )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return( self.__setFileParameter( lfn, 'Target', surl ) )

  def getTargetSURL( self, lfn ):
    """ target SURL getter

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Target' )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__isSubmissionValid()
    if not res['OK']:
      return res
    res = self.__createSURLPairFile()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput )
    return S_OK( resDict )

  def __isSubmissionValid( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None, overwrite = False ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogMetadata ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      # Fix first the PFN
      pfn = self.oSourceSE.getPfnForLfn( lfn ).get( 'Value', {} ).get( 'Successful', {} ).get( lfn, replicas[self.sourceSE] )
      res = Utils.executeSingleFileOrDirWrapper( self.oSourceSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = {}
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Source']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for pfn, metadata in res['Value']['Successful'].items():
      lfn = toResolve[pfn]
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata['Unavailable']:
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata['Lost']:
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata['Cached']:
        if lfnStatus != 'Staging':
          toStage.append( pfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not ( compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ) ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging - %s" % stage["Message"] )
        for pfn in toStage:
          lfn = toResolve[pfn]
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for pfn in toStage:
          lfn = toResolve[pfn]
          if pfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif pfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][pfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = self.oTargetSE.getPfnForLfn( lfn )
      if not res['OK'] or lfn not in res['Value']['Successful']:
        gLogger.warn( "resolveTarget: skipping %s - failed to create target pfn" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to create Target" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][lfn]
      res = self.oTargetSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True )
      if not res['OK'] or pfn not in res['Value']['Successful']:
        reason = res.get( 'Message', res.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][pfn]
      res = self.setTargetSURL( lfn, pfn )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = {}
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Target']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for pfn, exists in res['Value']['Successful'].items():
      if exists:
        lfn = toResolve[pfn]
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == pfn:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( pfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createSURLPairFile( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    fd, fileName = tempfile.mkstemp()
    surlFile = os.fdopen( fd, 'w' )
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        surlFile.write( "%s %s%s\n" % ( source, target, cksmStr ) )
        self.submittedFiles += 1
    surlFile.close()
    self.surlFile = fileName
    return S_OK()

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    comm = [ 'glite-transfer-submit', '-s', self.ftsServer, '-f', self.surlFile, '-o' ]
    if self.targetToken:
      comm += [ '-t', self.targetToken ]
    if self.sourceToken:
      comm += [ '-S', self.sourceToken ]
    if self.__cksmTest:
      comm.append( "--compare-checksums" )
    gLogger.verbose( 'Executing %s' % ' '.join( comm ) )
    res = executeGridCommand( '', comm )
    os.remove( self.surlFile )
    if not res['OK']:
      return res
    returnCode, output, errStr = res['Value']
    if not returnCode == 0:
      return S_ERROR( errStr )
    guid = output.replace( '\n', '' )
    if not checkGuid( guid ):
      return S_ERROR( 'Wrong GUID format returned' )
    self.ftsGUID = guid
    # if self.priority != 3:
    #  comm = ['glite-transfer-setpriority','-s', self.ftsServer,self.ftsGUID,str(self.priority)]
    #  executeGridCommand('',comm)
    return res

  def __getFTSServer( self, site ):
    try:
      configPath = '/Resources/FTSEndpoints/%s' % site
      endpointURL = gConfig.getValue( configPath )
      if not endpointURL:
        errStr = "FTSRequest.__getFTSServer: Failed to find FTS endpoint, check CS entry for '%s'." % site
        return S_ERROR( errStr )
      return S_OK( endpointURL )
    except Exception, x:
      return S_ERROR( 'FTSRequest.__getFTSServer: Failed to obtain endpoint details from CS' )
  from DIRAC.RequestManagementSystem.Client.Operation import Operation
  from DIRAC.RequestManagementSystem.Client.File import File
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
  from DIRAC.Core.Utilities.List import breakListIntoChunks

  lfnChunks = breakListIntoChunks( lfnList, 100 )
  multiRequests = len( lfnChunks ) > 1

  error = 0
  count = 0
  reqClient = ReqClient()
  fc = FileCatalog()
  requestIDs = []
  for lfnChunk in lfnChunks:
    metaDatas = fc.getFileMetadata( lfnChunk )
    if not metaDatas["OK"]:
      gLogger.error( "unable to read metadata for lfns: %s" % metaDatas["Message"] )
      error = -1
      continue
    metaDatas = metaDatas["Value"]
    for failedLFN, reason in metaDatas["Failed"].items():
      gLogger.error( "skipping %s: %s" % ( failedLFN, reason ) )
    lfnChunk = set( metaDatas["Successful"] )

    if not lfnChunk:
      gLogger.error( "LFN list is empty!!!" )
      error = -1
      continue

    if len( lfnChunk ) > Operation.MAX_FILES:
Exemplo n.º 19
0
class InputDataAgent( OptimizerModule ):
  """
      The specific Optimizer must provide the following methods:
      - initializeOptimizer() before each execution cycle
      - checkJob() - the main method called for each job
  """

  #############################################################################
  def initializeOptimizer( self ):
    """Initialize specific parameters for JobSanityAgent.
    """
    self.failedMinorStatus = self.am_getOption( '/FailedJobStatus', 'Input Data Not Available' )
    #this will ignore failover SE files
    self.checkFileMetadata = self.am_getOption( 'CheckFileMetadata', True )

    self.dataManager = DataManager()
    self.resourceStatus = ResourceStatus()
    self.fc = FileCatalog()

    self.seToSiteMapping = {}
    self.lastCScheck = 0
    self.cacheLength = 600

    return S_OK()

  #############################################################################
  def checkJob( self, job, classAdJob ):
    """
    This method does the optimization corresponding to this Agent, 
    it is call for each job by the Optimizer framework
    """

    result = self.jobDB.getInputData( job )
    if not result['OK']:
      self.log.warn( 'Failed to get input data from JobdB for %s' % ( job ) )
      self.log.warn( result['Message'] )
      return result
    if not result['Value']:
      self.log.verbose( 'Job %s has no input data requirement' % ( job ) )
      return self.setNextOptimizer( job )

    #Check if we already executed this Optimizer and the input data is resolved
    res = self.getOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ) )
    if res['OK'] and len( res['Value'] ):
      pass
    else:
      self.log.verbose( 'Job %s has an input data requirement and will be processed' % ( job ) )
      inputData = result['Value']
      result = self.__resolveInputData( job, inputData )
      if not result['OK']:
        self.log.warn( result['Message'] )
        return result

    return self.setNextOptimizer( job )

  #############################################################################
  def __resolveInputData( self, job, inputData ):
    """This method checks the file catalog for replica information.
    """
    lfns = [ fname.replace( 'LFN:', '' ) for fname in inputData ]

    start = time.time()
    # In order to place jobs on Hold if a certain SE is banned we need first to check first if
    # if the replicas are really available
    replicas = self.dataManager.getActiveReplicas( lfns )
    timing = time.time() - start
    self.log.verbose( 'Catalog Replicas Lookup Time: %.2f seconds ' % ( timing ) )
    if not replicas['OK']:
      self.log.warn( replicas['Message'] )
      return replicas

    replicaDict = replicas['Value']

    siteCandidates = self.__checkReplicas( job, replicaDict )

    if not siteCandidates['OK']:
      self.log.warn( siteCandidates['Message'] )
      return siteCandidates

    if self.checkFileMetadata:
      guids = True
      start = time.time()
      guidDict = self.fc.getFileMetadata( lfns )
      timing = time.time() - start
      self.log.info( 'Catalog Metadata Lookup Time: %.2f seconds ' % ( timing ) )

      if not guidDict['OK']:
        self.log.warn( guidDict['Message'] )
        guids = False

      failed = guidDict['Value']['Failed']
      if failed:
        self.log.warn( 'Failed to establish some GUIDs' )
        self.log.warn( failed )
        guids = False

      if guids:
        for lfn, reps in replicaDict['Successful'].items():
          guidDict['Value']['Successful'][lfn].update( reps )
        replicas = guidDict

    resolvedData = {}
    resolvedData['Value'] = replicas
    resolvedData['SiteCandidates'] = siteCandidates['Value']
    result = self.setOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ), resolvedData )
    if not result['OK']:
      self.log.warn( result['Message'] )
      return result
    return S_OK( resolvedData )

  #############################################################################
  def __checkReplicas( self, job, replicaDict ):
    """Check that all input lfns have valid replicas and can all be found at least in one single site.
    """
    badLFNs = []

    if replicaDict.has_key( 'Successful' ):
      for lfn, reps in replicaDict['Successful'].items():
        if not reps:
          badLFNs.append( 'LFN:%s Problem: No replicas available' % ( lfn ) )
    else:
      return S_ERROR( 'No replica Info available' )

    if replicaDict.has_key( 'Failed' ):
      for lfn, cause in replicaDict['Failed'].items():
        badLFNs.append( 'LFN:%s Problem: %s' % ( lfn, cause ) )

    if badLFNs:
      self.log.info( 'Found %s problematic LFN(s) for job %s' % ( len( badLFNs ), job ) )
      param = '\n'.join( badLFNs )
      self.log.info( param )
      result = self.setJobParam( job, self.am_getModuleParam( 'optimizerName' ), param )
      if not result['OK']:
        self.log.error( result['Message'] )
      return S_ERROR( 'Input Data Not Available' )

    return self.__getSiteCandidates( replicaDict['Successful'] )

  #############################################################################
  # FIXME: right now this is unused...
  def __checkActiveSEs( self, job, replicaDict ):
    """
    Check active SE and replicas and identify possible Site candidates for 
    the execution of the job
    """
    # Now let's check if some replicas might not be available due to banned SE's
    activeReplicas = self.dataManager.checkActiveReplicas( replicaDict )
    if not activeReplicas['OK']:
      # due to banned SE's input data might no be available
      msg = "On Hold: Missing replicas due to banned SE"
      self.log.info( msg )
      self.log.warn( activeReplicas['Message'] )
      return S_ERROR( msg )

    activeReplicaDict = activeReplicas['Value']

    siteCandidates = self.__checkReplicas( job, activeReplicaDict )

    if not siteCandidates['OK']:
      # due to a banned SE's input data is not available at a single site      
      msg = "On Hold: Input data not Available due to banned SE"
      self.log.info( msg )
      self.log.warn( siteCandidates['Message'] )
      return S_ERROR( msg )

    resolvedData = {}
    resolvedData['Value'] = activeReplicas
    resolvedData['SiteCandidates'] = siteCandidates['Value']
    result = self.setOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ), resolvedData )
    if not result['OK']:
      self.log.warn( result['Message'] )
      return result
    return S_OK( resolvedData )


  #############################################################################
  def __getSitesForSE( self, se ):
    """ Returns a list of sites having the given SE as a local one.
        Uses the local cache of the site-se information
    """

    # Empty the cache if too old
    if ( time.time() - self.lastCScheck ) > self.cacheLength:
      self.log.verbose( 'Resetting the SE to site mapping cache' )
      self.seToSiteMapping = {}
      self.lastCScheck = time.time()

    if se not in self.seToSiteMapping:
      sites = getSitesForSE( se )
      if sites['OK']:
        self.seToSiteMapping[se] = list( sites['Value'] )
      return sites
    else:
      return S_OK( self.seToSiteMapping[se] )

  #############################################################################
  def __getSiteCandidates( self, inputData ):
    """This method returns a list of possible site candidates based on the
       job input data requirement.  For each site candidate, the number of files
       on disk and tape is resolved.
    """

    fileSEs = {}
    for lfn, replicas in inputData.items():
      siteList = []
      for se in replicas.keys():
        sites = self.__getSitesForSE( se )
        if sites['OK']:
          siteList += sites['Value']
      fileSEs[lfn] = uniqueElements( siteList )

    siteCandidates = []
    i = 0
    for _fileName, sites in fileSEs.items():
      if not i:
        siteCandidates = sites
      else:
        tempSite = []
        for site in siteCandidates:
          if site in sites:
            tempSite.append( site )
        siteCandidates = tempSite
      i += 1

    if not len( siteCandidates ):
      return S_ERROR( 'No candidate sites available' )

    #In addition, check number of files on tape and disk for each site
    #for optimizations during scheduling
    siteResult = {}
    for site in siteCandidates:
      siteResult[site] = { 'disk': [], 'tape': [] }

    seDict = {}
    for lfn, replicas in inputData.items():
      for se in replicas.keys():
        if se not in seDict:
          sites = self.__getSitesForSE( se )
          if not sites['OK']:
            continue
          try:
            #storageElement = StorageElement( se )
            result = self.resourceStatus.getStorageElementStatus( se, statusType = 'ReadAccess' )
            if not result['OK']:
              continue
            seDict[se] = { 'Sites': sites['Value'], 'SEParams': result['Value'][se] }
            result = getStorageElementOptions( se )
            if not result['OK']:
              continue
            seDict[se]['SEParams'].update(result['Value'])
          except Exception:
            self.log.exception( 'Failed to instantiate StorageElement( %s )' % se )
            continue
        for site in seDict[se]['Sites']:
          if site in siteCandidates:
            if seDict[se]['SEParams']['ReadAccess'] and seDict[se]['SEParams']['DiskSE']:
              if lfn not in siteResult[site]['disk']:
                siteResult[site]['disk'].append( lfn )
                if lfn in siteResult[site]['tape']:
                  siteResult[site]['tape'].remove( lfn )
            if seDict[se]['SEParams']['ReadAccess'] and seDict[se]['SEParams']['TapeSE']:
              if lfn not in siteResult[site]['tape'] and lfn not in siteResult[site]['disk']:
                siteResult[site]['tape'].append( lfn )

    for site in siteResult:
      siteResult[site]['disk'] = len( siteResult[site]['disk'] )
      siteResult[site]['tape'] = len( siteResult[site]['tape'] )
    return S_OK( siteResult )
Exemplo n.º 20
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False

    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0
    self.transferTime = 0

    self.submitCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/SubmitCommand', 'glite-transfer-submit' )
    self.monitorCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/MonitorCommand', 'glite-transfer-status' )
    self.ftsJob = None
    self.ftsFiles = []

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      res = self.oSourceSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for SourceSE %s: %s" % ( self.sourceSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.sourceSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      res = self.oTargetSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for TargetSE %s: %s" % ( self.targetSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.targetSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( "SRM2" )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()


  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Source', surl )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Target', surl )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__prepareForSubmission()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput, full = False )
    return S_OK( resDict )

  def __prepareForSubmission( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if lfn not in self.catalogMetadata ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      # Fix first the PFN
      pfn = self.oSourceSE.getPfnForLfn( lfn ).get( 'Value', {} ).get( 'Successful', {} ).get( lfn, replicas[self.sourceSE] )
      res = returnSingleResult( self.oSourceSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = {}
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Source']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for pfn, metadata in res['Value']['Successful'].items():
      lfn = toResolve[pfn]
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata['Unavailable']:
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata['Lost']:
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata['Cached']:
        if lfnStatus != 'Staging':
          toStage.append( pfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging - %s" % stage["Message"] )
        for pfn in toStage:
          lfn = toResolve[pfn]
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for pfn in toStage:
          lfn = toResolve[pfn]
          if pfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif pfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][pfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = self.oTargetSE.getPfnForLfn( lfn )
      if not res['OK'] or lfn not in res['Value']['Successful']:
        gLogger.warn( "resolveTarget: skipping %s - failed to create target pfn" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to create Target" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][lfn]
      res = self.oTargetSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True )
      if not res['OK'] or pfn not in res['Value']['Successful']:
        reason = res.get( 'Message', res.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][pfn]
      res = self.setTargetSURL( lfn, pfn )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = {}
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Target']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for pfn, exists in res['Value']['Successful'].items():
      if exists:
        lfn = toResolve[pfn]
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == pfn:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( pfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createFTSFiles( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    self.__updateMetadataCache()
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      if lfnStatus not in self.noSubmitStatus:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        ftsFile = FTSFile()
        ftsFile.LFN = lfn
        ftsFile.SourceSURL = self.fileDict[lfn].get( 'Source' )
        ftsFile.TargetSURL = self.fileDict[lfn].get( 'Target' )
        ftsFile.SourceSE = self.sourceSE
        ftsFile.TargetSE = self.targetSE
        ftsFile.Status = self.fileDict[lfn].get( 'Status' )
        ftsFile.Checksum = cksmStr
        ftsFile.Size = self.catalogMetadata.get( lfn, {} ).get( 'Size' )
        self.ftsFiles.append( ftsFile )
        self.submittedFiles += 1
    return S_OK()

  def __createFTSJob( self, guid = None ):
    self.__createFTSFiles()
    ftsJob = FTSJob()
    ftsJob.RequestID = 0
    ftsJob.OperationID = 0
    ftsJob.SourceSE = self.sourceSE
    ftsJob.TargetSE = self.targetSE
    ftsJob.SourceToken = self.sourceToken
    ftsJob.TargetToken = self.targetToken
    ftsJob.FTSServer = self.ftsServer
    if guid:
      ftsJob.FTSGUID = guid

    for ftsFile in self.ftsFiles:
      ftsFile.Attempt += 1
      ftsFile.Error = ""
      ftsJob.addFile( ftsFile )
    self.ftsJob = ftsJob

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    log = gLogger.getSubLogger( 'Submit' )
    self.__createFTSJob()

    submit = self.ftsJob.submitFTS2( command = self.submitCommand )
    if not submit["OK"]:
      log.error( "unable to submit FTSJob: %s" % submit["Message"] )
      return submit

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )

    # # update statuses for job files
    for ftsFile in self.ftsJob:
      ftsFile.FTSGUID = self.ftsJob.FTSGUID
      ftsFile.Status = "Submitted"
      ftsFile.Attempt += 1

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )
    self.ftsGUID = self.ftsJob.FTSGUID
    return S_OK()

  def __resolveFTSServer( self ):
    """
    resolve FTS server to use, it should be the closest one from target SE

    :param self: self reference
    """
    from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTSServersForSites
    if not self.targetSE:
      return S_ERROR( "Target SE not set" )
    res = getSitesForSE( self.targetSE )
    if not res['OK'] or not res['Value']:
      return S_ERROR( "Could not determine target site" )
    targetSites = res['Value']

    targetSite = ''
    for targetSite in targetSites:
      targetFTS = getFTSServersForSites( [targetSite] )
      if targetFTS['OK']:
        ftsTarget = targetFTS['Value'][targetSite]
        if ftsTarget:
          self.ftsServer = ftsTarget
          return S_OK( self.ftsServer )
      else:
        return targetFTS
    return S_ERROR( 'No FTS server found for %s' % targetSite )

  ####################################################################
  #
  #  Methods for monitoring
  #

  def summary( self, untilTerminal = False, printOutput = False ):
    """ summary of FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    while not self.isTerminal:
      res = self.__parseOutput( full = True )
      if not res['OK']:
        return res
      if untilTerminal:
        self.__print()
      self.isRequestTerminal()
      if res['Value'] or ( not untilTerminal ):
        break
      time.sleep( 1 )
    if untilTerminal:
      print ""
    if printOutput and ( not untilTerminal ):
      return self.dumpSummary( printOutput = printOutput )
    return S_OK()

  def monitor( self, untilTerminal = False, printOutput = False, full = True ):
    """ monitor FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    if not self.ftsJob:
      self.resolveSource()
      self.__createFTSJob( self.ftsGUID )
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    if untilTerminal:
      res = self.summary( untilTerminal = untilTerminal, printOutput = printOutput )
      if not res['OK']:
        return res
    res = self.__parseOutput( full = full )
    if not res['OK']:
      return res
    if untilTerminal:
      self.finalize()
    if printOutput:
      self.dump()
    return res

  def dumpSummary( self, printOutput = False ):
    """ get FTS job summary as str

    :param self: self reference
    :param bool printOutput: print summary to stdout
    """

    outStr = ''
    for status in sorted( self.statusSummary ):
      if self.statusSummary[status]:
        outStr = '%s\t%-10s : %-10s\n' % ( outStr, status, str( self.statusSummary[status] ) )
    outStr = outStr.rstrip( '\n' )
    if printOutput:
      print outStr
    return S_OK( outStr )

  def __print( self ):
    """ print progress bar of FTS job completeness to stdout

    :param self: self reference
    """
    width = 100
    bits = int( ( width * self.percentageComplete ) / 100 )
    outStr = "|%s>%s| %.1f%s %s %s" % ( "="*bits, " "*( width - bits ),
                                        self.percentageComplete, "%",
                                        self.requestStatus, " "*10 )
    sys.stdout.write( "%s\r" % ( outStr ) )
    sys.stdout.flush()

  def dump( self ):
    """ print FTS job parameters and files to stdout

    :param self: self reference
    """
    print "%-10s : %-10s" % ( "Status", self.requestStatus )
    print "%-10s : %-10s" % ( "Source", self.sourceSE )
    print "%-10s : %-10s" % ( "Target", self.targetSE )
    print "%-10s : %-128s" % ( "Server", self.ftsServer )
    print "%-10s : %-128s" % ( "GUID", self.ftsGUID )
    for lfn in sorted( self.fileDict ):
      print "\n  %-15s : %-128s" % ( 'LFN', lfn )
      for key in ['Source', 'Target', 'Status', 'Reason', 'Duration']:
        print "  %-15s : %-128s" % ( key, str( self.fileDict[lfn].get( key ) ) )
    return S_OK()

  def __isSummaryValid( self ):
    """ check validity of FTS job summary report

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK()

  def __parseOutput( self, full = False ):
    """ execute glite-transfer-status command and parse its output

    :param self: self reference
    :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well
    """
    monitor = self.ftsJob.monitorFTS2( command = self.monitorCommand, full = full )
    if not monitor['OK']:
      return monitor
    self.percentageComplete = self.ftsJob.Completeness
    self.requestStatus = self.ftsJob.Status
    self.submitTime = self.ftsJob.SubmitTime

    statusSummary = monitor['Value']
    if statusSummary:
      for state in statusSummary:
        self.statusSummary[state] = statusSummary[state]

    self.transferTime = 0
    for ftsFile in self.ftsJob:
      lfn = ftsFile.LFN
      self.__setFileParameter( lfn, 'Status', ftsFile.Status )
      self.__setFileParameter( lfn, 'Reason', ftsFile.Error )
      self.__setFileParameter( lfn, 'Duration', ftsFile._duration )
      targetURL = self.__getFileParameter( lfn, 'Target' )
      if not targetURL['OK']:
        self.__setFileParameter( lfn, 'Target', ftsFile.TargetSURL )
      self.transferTime += int( ftsFile._duration )
    return S_OK()

  ####################################################################
  #
  #  Methods for finalization
  #

  def finalize( self ):
    """ finalize FTS job

    :param self: self reference
    """
    self.__updateMetadataCache()
    transEndTime = dateTime()
    regStartTime = time.time()
    res = self.getTransferStatistics()
    transDict = res['Value']

    res = self.__registerSuccessful( transDict['transLFNs'] )

    regSuc, regTotal = res['Value']
    regTime = time.time() - regStartTime
    if self.sourceSE and self.targetSE:
      self.__sendAccounting( regSuc, regTotal, regTime, transEndTime, transDict )
    return S_OK()

  def getTransferStatistics( self ):
    """ collect information of Transfers that can be used by Accounting

    :param self: self reference
    """
    transDict = { 'transTotal': len( self.fileDict ),
                  'transLFNs': [],
                  'transOK': 0,
                  'transSize': 0 }

    for lfn in self.fileDict:
      if self.fileDict[lfn].get( 'Status' ) in self.successfulStates:
        if self.fileDict[lfn].get( 'Duration', 0 ):
          transDict['transLFNs'].append( lfn )
          transDict['transOK'] += 1
          if lfn in self.catalogMetadata:
            transDict['transSize'] += self.catalogMetadata[lfn].get( 'Size', 0 )

    return S_OK( transDict )

  def getFailedRegistrations( self ):
    """ get failed registrations dict

    :param self: self reference
    """
    return S_OK( self.failedRegistrations )

  def __registerSuccessful( self, transLFNs ):
    """ register successfully transferred files to the catalogs,
    fill failedRegistrations dict for files that failed to register

    :param self: self reference
    :param list transLFNs: LFNs in FTS job
    """
    self.failedRegistrations = {}
    toRegister = {}
    for lfn in transLFNs:
      res = returnSingleResult( self.oTargetSE.getPfnForProtocol( self.fileDict[lfn].get( 'Target' ), protocol = 'SRM2', withPort = False ) )
      if not res['OK']:
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        toRegister[lfn] = { 'PFN' : res['Value'], 'SE' : self.targetSE }
    if not toRegister:
      return S_OK( ( 0, 0 ) )
    res = self.__getCatalogObject()
    if not res['OK']:
      for lfn in toRegister:
        self.failedRegistrations = toRegister
        self.log.error( 'Failed to get Catalog Object', res['Message'] )
        return S_OK( ( 0, len( toRegister ) ) )
    res = self.oCatalog.addReplica( toRegister )
    if not res['OK']:
      self.failedRegistrations = toRegister
      self.log.error( 'Failed to get Catalog Object', res['Message'] )
      return S_OK( ( 0, len( toRegister ) ) )
    for lfn, error in res['Value']['Failed'].items():
      self.failedRegistrations[lfn] = toRegister[lfn]
      self.log.error( 'Registration of Replica failed', '%s : %s' % ( lfn, str( error ) ) )
    return S_OK( ( len( res['Value']['Successful'] ), len( toRegister ) ) )

  def __sendAccounting( self, regSuc, regTotal, regTime, transEndTime, transDict ):
    """ send accounting record

    :param self: self reference
    :param regSuc: number of files successfully registered
    :param regTotal: number of files attepted to register
    :param regTime: time stamp at the end of registration
    :param transEndTime: time stamp at the end of FTS job
    :param dict transDict: dict holding couters for files being transerred, their sizes and successfull transfers
    """

    oAccounting = DataOperation()
    oAccounting.setEndTime( transEndTime )
    oAccounting.setStartTime( self.submitTime )

    accountingDict = {}
    accountingDict['OperationType'] = 'replicateAndRegister'
    result = getProxyInfo()
    if not result['OK']:
      userName = '******'
    else:
      userName = result['Value'].get( 'username', 'unknown' )
    accountingDict['User'] = userName
    accountingDict['Protocol'] = 'FTS' if 'fts3' not in self.ftsServer else 'FTS3'
    accountingDict['RegistrationTime'] = regTime
    accountingDict['RegistrationOK'] = regSuc
    accountingDict['RegistrationTotal'] = regTotal
    accountingDict['TransferOK'] = transDict['transOK']
    accountingDict['TransferTotal'] = transDict['transTotal']
    accountingDict['TransferSize'] = transDict['transSize']
    accountingDict['FinalStatus'] = self.requestStatus
    accountingDict['Source'] = self.sourceSE
    accountingDict['Destination'] = self.targetSE
    accountingDict['TransferTime'] = self.transferTime
    oAccounting.setValuesFromDict( accountingDict )
    self.log.verbose( "Attempting to commit accounting message..." )
    oAccounting.commit()
    self.log.verbose( "...committed." )
    return S_OK()
Exemplo n.º 21
0
class ReplicateAndRegister(DMSRequestOperationsBase):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__(self, operation=None, csPath=None):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super(ReplicateAndRegister, self).__init__(operation, csPath)
    # # own gMonitor stuff for files
    gMonitor.registerActivity("ReplicateAndRegisterAtt", "Replicate and register attempted",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("ReplicateOK", "Replications successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("ReplicateFail", "Replications failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RegisterOK", "Registrations successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RegisterFail", "Registrations failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    # # for FTS
    gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    # # SE cache

    # Clients
    self.fc = FileCatalog()

  def __call__(self):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error('Failed to check replicas', checkReplicas["Message"])
    if hasattr(self, "FTSMode") and getattr(self, "FTSMode"):
      bannedGroups = getattr(self, "FTSBannedGroups") if hasattr(self, "FTSBannedGroups") else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.verbose("usage of FTS system is banned for request's owner")
        return self.dmTransfer()

      if getattr(self, 'UseNewFTS3', False):
        return self.fts3Transfer()
      else:
        return self.ftsTransfer()

    return self.dmTransfer()

  def __checkReplicas(self):
    """ check done replicas and update file states  """
    waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation
                         if opFile.Status in ("Waiting", "Scheduled")])
    targetSESet = set(self.operation.targetSEList)

    replicas = self.fc.getReplicas(waitingFiles.keys())
    if not replicas["OK"]:
      self.log.error('Failed to get replicas', replicas["Message"])
      return replicas

    reMissing = re.compile(r".*such file.*")
    for failedLFN, errStr in replicas["Value"]["Failed"].iteritems():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search(errStr.lower()):
        self.log.error("File does not exists", failedLFN)
        gMonitor.addMark("ReplicateFail", len(targetSESet))
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].iteritems():
      if targetSESet.issubset(set(reps)):
        self.log.info("file %s has been replicated to all targets" % successfulLFN)
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles(self, toSchedule):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': opFile, 'lfn2': opFile}
    """
    if toSchedule:
      self.log.info("found %s files to schedule, getting metadata from FC" % len(toSchedule))
    else:
      self.log.verbose("No files to schedule")
      return S_OK([])

    res = self.fc.getFileMetadata(toSchedule.keys())
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn("Can't schedule %d files: problems getting the metadata: %s" %
                      (len(res['Value']['Failed']), ', '.join(res['Value']['Failed'])))
      metadata = res['Value']['Successful']

    filesToSchedule = {}

    for lfn, lfnMetadata in metadata.iteritems():
      opFileToSchedule = toSchedule[lfn][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      # In principle this is defined already in filterReplicas()
      if not opFileToSchedule.Checksum:
        opFileToSchedule.Checksum = metadata[lfn]['Checksum']
        opFileToSchedule.ChecksumType = metadata[lfn]['ChecksumType']
      opFileToSchedule.Size = metadata[lfn]['Size']

      filesToSchedule[opFileToSchedule.LFN] = opFileToSchedule

    return S_OK(filesToSchedule)

  def _filterReplicas(self, opFile):
    """ filter out banned/invalid source SEs """
    return filterReplicas(opFile, logger=self.log, dataManager=self.dm)

  def ftsTransfer(self):
    """ replicate and register using FTS """

    self.log.info("scheduling files in FTS...")

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark("FTSScheduleAtt")
      gMonitor.addMark("FTSScheduleFail")
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK("%s targets are banned for writing" % ",".join(bannedTargets['Value']))

    # Can continue now
    self.log.verbose("No targets banned for writing")

    toSchedule = {}

    delayExecution = 0
    errors = defaultdict(int)
    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark("FTSScheduleAtt")
      # # check replicas
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas.get("Valid")
      noMetaReplicas = replicas.get("NoMetadata")
      noReplicas = replicas.get('NoReplicas')
      badReplicas = replicas.get('Bad')
      noActiveReplicas = replicas.get('NoActiveReplicas')

      if validReplicas:
        validTargets = list(set(self.operation.targetSEList) - set(validReplicas))
        if not validTargets:
          self.log.info("file %s is already present at all targets" % opFile.LFN)
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [opFile, validReplicas, validTargets]
      else:
        gMonitor.addMark("FTSScheduleFail")
        if noMetaReplicas:
          err = "Couldn't get metadata"
          errors[err] += 1
          self.log.verbose(
              "unable to schedule '%s', %s at %s" %
              (opFile.LFN, err, ','.join(noMetaReplicas)))
          opFile.Error = err
        elif noReplicas:
          err = "File doesn't exist"
          errors[err] += 1
          self.log.error("Unable to schedule transfer",
                         "%s %s at %s" % (opFile.LFN, err, ','.join(noReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif badReplicas:
          err = "All replicas have a bad checksum"
          errors[err] += 1
          self.log.error("Unable to schedule transfer",
                         "%s, %s at %s" % (opFile.LFN, err, ','.join(badReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif noActiveReplicas:
          err = "No active replica found"
          errors[err] += 1
          self.log.verbose("Unable to schedule transfer",
                           "%s, %s at %s" % (opFile.LFN, err, ','.join(noActiveReplicas)))
          opFile.Error = err
          # All source SEs are banned, delay execution by 1 hour
          delayExecution = 60

    if delayExecution:
      self.log.info("Delay execution of the request by %d minutes" % delayExecution)
      self.request.delayNextExecution(delayExecution)
    # Log error counts
    for error, count in errors.iteritems():
      self.log.error(error, 'for %d files' % count)

    filesToScheduleList = []
    res = self._addMetadataToFiles(toSchedule)
    if not res['OK']:
      return res
    else:
      filesToSchedule = res['Value']

      for lfn in filesToSchedule:
        filesToScheduleList.append((filesToSchedule[lfn][0].toJSON()['Value'],
                                    toSchedule[lfn][1],
                                    toSchedule[lfn][2]))

    if filesToScheduleList:

      ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID,
                                            self.operation.OperationID,
                                            filesToScheduleList)
      if not ftsSchedule["OK"]:
        self.log.error("Completely failed to schedule to FTS:", ftsSchedule["Message"])
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      self.log.info("%d files have been scheduled to FTS" % len(ftsSchedule['Successful']))
      for opFile in self.operation:
        fileID = opFile.FileID
        if fileID in ftsSchedule["Successful"]:
          gMonitor.addMark("FTSScheduleOK", 1)
          opFile.Status = "Scheduled"
          self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
        elif fileID in ftsSchedule["Failed"]:
          gMonitor.addMark("FTSScheduleFail", 1)
          opFile.Error = ftsSchedule["Failed"][fileID]
          if 'sourceSURL equals to targetSURL' in opFile.Error:
            # In this case there is no need to continue
            opFile.Status = 'Failed'
          self.log.warn("unable to schedule %s for FTS: %s" % (opFile.LFN, opFile.Error))
    else:
      self.log.info("No files to schedule after metadata checks")

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer(fromFTS=True)

  def _checkExistingFTS3Operations(self):
    """
       Check if there are ongoing FTS3Operation for the current RMS Operation

       Under some conditions, we can be trying to schedule files while
       there is still an FTS transfer going on. This typically happens
       when the REA hangs. To prevent further race condition, we check
       if there are FTS3Operations in a non Final state matching the
       current operation ID. If so, we put the corresponding files in
       scheduled mode. We will then wait till the FTS3 Operation performs
       the callback

       :returns: S_OK with True if we can go on, False if we should stop the processing
    """

    res = FTS3Client().getOperationsFromRMSOpID(self.operation.OperationID)

    if not res['OK']:
      self.log.debug(
          "Could not get FTS3Operations matching OperationID",
          self.operation.OperationID)
      return res

    existingFTSOperations = res['Value']
    # It is ok to have FTS Operations in a final state, so we
    # care only about the others
    unfinishedFTSOperations = [
        ops for ops in existingFTSOperations if ops.status not in FTS3TransferOperation.FINAL_STATES]

    if not unfinishedFTSOperations:
      self.log.debug("No ongoing FTS3Operations, all good")
      return S_OK(True)

    self.log.warn("Some FTS3Operations already exist for the RMS Operation:",
                  [op.operationID for op in unfinishedFTSOperations])

    # This would really be a screwed up situation !
    if len(unfinishedFTSOperations) > 1:
      self.log.warn("That's a serious problem !!")

    # We take the rmsFileID of the files in the Operations,
    # find the corresponding File object, and set them scheduled
    rmsFileIDsToSetScheduled = set(
        [ftsFile.rmsFileID for ftsOp in unfinishedFTSOperations for ftsFile in ftsOp.ftsFiles])

    for opFile in self.operation:
      # If it is in the DB, it has a FileID
      opFileID = opFile.FileID
      if opFileID in rmsFileIDsToSetScheduled:
        self.log.warn("Setting RMSFile as already scheduled", opFileID)
        opFile.Status = "Scheduled"

    # We return here such that the Request is set back to Scheduled in the DB
    # With no further modification
    return S_OK(False)

  def fts3Transfer(self):
    """ replicate and register using FTS3 """

    self.log.info("scheduling files in FTS3...")

    # Check first if we do not have ongoing transfers

    res = self._checkExistingFTS3Operations()
    if not res['OK']:
      return res

    # if res['Value'] is False
    # it means that there are ongoing transfers
    # and we should stop here
    if res['Value'] is False:
      # return S_OK such that the request is put back
      return S_OK()

    fts3Files = []
    toSchedule = {}

    # Dict which maps the FileID to the object
    rmsFilesIds = {}

    for opFile in self.getWaitingFilesList():
      rmsFilesIds[opFile.FileID] = opFile

      opFile.Error = ''
      gMonitor.addMark("FTSScheduleAtt")
      # # check replicas
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if validReplicas:
        validTargets = list(set(self.operation.targetSEList) - set(validReplicas))
        if not validTargets:
          self.log.info("file %s is already present at all targets" % opFile.LFN)
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [opFile, validTargets]

      else:
        gMonitor.addMark("FTSScheduleFail")
        if noMetaReplicas:
          self.log.warn("unable to schedule '%s', couldn't get metadata at %s" % (opFile.LFN, ','.join(noMetaReplicas)))
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error(
              "Unable to schedule transfer", "File %s doesn't exist at %s" %
              (opFile.LFN, ','.join(noReplicas)))
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error(
              "Unable to schedule transfer",
              "File %s, all replicas have a bad checksum at %s" %
              (opFile.LFN,
               ','.join(badReplicas)))
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn(
              "unable to schedule %s, could not get a PFN at %s" %
              (opFile.LFN, ','.join(noPFN)))

    res = self._addMetadataToFiles(toSchedule)
    if not res['OK']:
      return res
    else:
      filesToSchedule = res['Value']

      for lfn in filesToSchedule:
        opFile = filesToSchedule[lfn]
        validTargets = toSchedule[lfn][1]
        for targetSE in validTargets:
          ftsFile = FTS3File.fromRMSFile(opFile, targetSE)
          fts3Files.append(ftsFile)

    if fts3Files:
      res = Registry.getUsernameForDN(self.request.OwnerDN)
      if not res['OK']:
        self.log.error(
            "Cannot get username for DN", "%s %s" %
            (self.request.OwnerDN, res['Message']))
        return res

      username = res['Value']
      fts3Operation = FTS3TransferOperation.fromRMSObjects(self.request, self.operation, username)
      fts3Operation.ftsFiles = fts3Files

      ftsSchedule = FTS3Client().persistOperation(fts3Operation)
      if not ftsSchedule["OK"]:
        self.log.error("Completely failed to schedule to FTS3:", ftsSchedule["Message"])
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule)

      self.log.info("%d files have been scheduled to FTS3" % len(fts3Files))

      for ftsFile in fts3Files:
        opFile = rmsFilesIds[ftsFile.rmsFileID]
        gMonitor.addMark("FTSScheduleOK", 1)
        opFile.Status = "Scheduled"
        self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
    else:
      self.log.info("No files to schedule after metadata checks")

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer(fromFTS=True)

  def dmTransfer(self, fromFTS=False):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      bannedSource = self.checkSEsRSS(sourceSE, 'ReadAccess')
      if not bannedSource["OK"]:
        gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
        gMonitor.addMark("ReplicateFail", len(self.operation))
        return bannedSource

      if bannedSource["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info(self.operation.Error)
        return S_OK(self.operation.Error)

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
      gMonitor.addMark("ReplicateFail", len(self.operation))
      return bannedTargets

    if bannedTargets['Value']:
      self.operation.Error = "%s targets are banned for writing" % ",".join(bannedTargets['Value'])
      return S_OK(self.operation.Error)

    # Can continue now
    self.log.verbose("No targets banned for writing")

    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    # # loop over files
    if fromFTS:
      self.log.info("Trying transfer using replica manager as FTS failed")
    else:
      self.log.info("Transferring files using Data manager...")
    errors = defaultdict(int)
    delayExecution = 0
    for opFile in waitingFiles:
      if opFile.Error in ("Couldn't get metadata",
                          "File doesn't exist",
                          'No active replica found',
                          "All replicas have a bad checksum",):
        err = "File already in error status"
        errors[err] += 1

      gMonitor.addMark("ReplicateAndRegisterAtt", 1)
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        self.log.error('Failed to check replicas', replicas["Message"])
        continue
      replicas = replicas["Value"]
      validReplicas = replicas.get("Valid")
      noMetaReplicas = replicas.get("NoMetadata")
      noReplicas = replicas.get('NoReplicas')
      badReplicas = replicas.get('Bad')
      noActiveReplicas = replicas.get('NoActiveReplicas')

      if not validReplicas:
        gMonitor.addMark("ReplicateFail")
        if noMetaReplicas:
          err = "Couldn't get metadata"
          errors[err] += 1
          self.log.verbose(
              "unable to replicate '%s', couldn't get metadata at %s" %
              (opFile.LFN, ','.join(noMetaReplicas)))
          opFile.Error = err
        elif noReplicas:
          err = "File doesn't exist"
          errors[err] += 1
          self.log.verbose(
              "Unable to replicate", "File %s doesn't exist at %s" %
              (opFile.LFN, ','.join(noReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif badReplicas:
          err = "All replicas have a bad checksum"
          errors[err] += 1
          self.log.error(
              "Unable to replicate", "%s, all replicas have a bad checksum at %s" %
              (opFile.LFN, ','.join(badReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif noActiveReplicas:
          err = "No active replica found"
          errors[err] += 1
          self.log.verbose("Unable to schedule transfer",
                           "%s, %s at %s" % (opFile.LFN, err, ','.join(noActiveReplicas)))
          opFile.Error = err
          # All source SEs are banned, delay execution by 1 hour
          delayExecution = 60
        continue
      # # get the first one in the list
      if sourceSE not in validReplicas:
        if sourceSE:
          err = "File not at specified source"
          errors[err] += 1
          self.log.warn(
              "%s is not at specified sourceSE %s, changed to %s" %
              (lfn, sourceSE, validReplicas[0]))
        sourceSE = validReplicas[0]

      # # loop over targetSE
      catalogs = self.operation.Catalog
      if catalogs:
        catalogs = [cat.strip() for cat in catalogs.split(',')]

      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE in validReplicas:
          self.log.warn("Request to replicate %s to an existing location: %s" % (lfn, targetSE))
          opFile.Status = 'Done'
          continue
        res = self.dm.replicateAndRegister(lfn, targetSE, sourceSE=sourceSE, catalog=catalogs)
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % (lfn, targetSE, repTime)

              gMonitor.addMark("ReplicateOK", 1)

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark("RegisterOK", 1)
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info(prString)
              else:

                gMonitor.addMark("RegisterFail", 1)
                prString += " but failed to register"
                self.log.warn(prString)

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation(
                    opFile, targetSE, type='RegisterReplica')
                self.request.insertAfter(registerOperation, self.operation)

            else:

              self.log.error("Failed to replicate", "%s to %s" % (lfn, targetSE))
              gMonitor.addMark("ReplicateFail", 1)
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark("ReplicateFail", 1)
            reason = res["Value"]["Failed"][lfn]
            self.log.error(
                "Failed to replicate and register", "File %s at %s:" %
                (lfn, targetSE), reason)
            opFile.Error = reason

        else:

          gMonitor.addMark("ReplicateFail", 1)
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error("DataManager error", res["Message"])

      if not opFile.Error:
        if len(self.operation.targetSEList) > 1:
          self.log.info("file %s has been replicated to all targetSEs" % lfn)
        opFile.Status = "Done"
    # Log error counts
    if delayExecution:
      self.log.info("Delay execution of the request by %d minutes" % delayExecution)
      self.request.delayNextExecution(delayExecution)
    for error, count in errors.iteritems():
      self.log.error(error, 'for %d files' % count)

    return S_OK()
Exemplo n.º 22
0
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache

    # Clients
    self.fc = FileCatalog()
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( 'Failed to check replicas', checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.verbose( "usage of FTS system is banned for request's owner" )
        return self.dmTransfer()
      return self.ftsTransfer()
    return self.dmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( 'Failed to get replicas', replicas["Message"] )
      return replicas

    reMissing = re.compile( r".*such file.*" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "File does not exists", failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                        ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['ChecksumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if validReplicas:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]
      else:
        gMonitor.addMark( "FTSScheduleFail" )
        if noMetaReplicas:
          self.log.warn( "unable to schedule '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN at %s" % ( opFile.LFN, ','.join( noPFN ) ) )

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( "Completely failed to schedule to FTS:", ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )
      for opFile in self.operation:
        fileID = opFile.FileID
        if fileID in ftsSchedule["Successful"]:
          gMonitor.addMark( "FTSScheduleOK", 1 )
          opFile.Status = "Scheduled"
          self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
        elif fileID in ftsSchedule["Failed"]:
          gMonitor.addMark( "FTSScheduleFail", 1 )
          opFile.Error = ftsSchedule["Failed"][fileID]
          if 'sourceSURL equals to targetSURL' in opFile.Error:
            # In this case there is no need to continue
            opFile.Status = 'Failed'
          self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer( fromFTS = True )

  def dmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      bannedSource = self.checkSEsRSS( sourceSE, 'ReadAccess' )
      if not bannedSource["OK"]:
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return bannedSource

      if bannedSource["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      self.operation.Error = "%s targets are banned for writing" % ",".join( bannedTargets['Value'] )
      return S_OK( self.operation.Error )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    # # loop over files
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( 'Failed to check replicas', replicas["Message"] )
        continue
      replicas = replicas["Value"]
      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "ReplicateFail" )
        if noMetaReplicas:
          self.log.warn( "unable to replicate '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to replicate", "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to replicate", "%s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to replicate %s, could not get a PFN" % opFile.LFN )
        continue
      # # get the first one in the list
      if sourceSE not in validReplicas:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, validReplicas[0] ) )
        sourceSE = validReplicas[0]

      # # loop over targetSE
      catalogs = self.operation.Catalog
      if catalogs:
        catalogs = [ cat.strip() for cat in catalogs.split( ',' ) ]

      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE in validReplicas:
          self.log.warn( "Request to replicate %s to an existing location: %s" % ( lfn, targetSE ) )
          opFile.Status = 'Done'
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalogs )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE, type = 'RegisterReplica' )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "Failed to replicate", "%s to %s" % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "Failed to replicate and register", "File %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( "DataManager error", res["Message"] )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
Exemplo n.º 23
0
class DIRACBackend(GridBackend):
    """Grid backend using the GFAL command line tools `gfal-*`."""

    def __init__(self, **kwargs):
        GridBackend.__init__(self, catalogue_prefix='', **kwargs)

        from DIRAC.Core.Base import Script
        Script.initialize()
        from DIRAC.FrameworkSystem.Client.ProxyManagerClient import ProxyManagerClient
        self.pm = ProxyManagerClient()

        proxy = self.pm.getUserProxiesInfo()
        if not proxy['OK']:
            raise BackendException("Proxy error.")

        from DIRAC.Interfaces.API.Dirac import Dirac
        self.dirac = Dirac()

        from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
        self.fc = FileCatalog()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()

        self._xattr_cmd = sh.Command('gfal-xattr').bake(_tty_out=False)
        self._replica_checksum_cmd = sh.Command('gfal-sum').bake(_tty_out=False)
        self._bringonline_cmd = sh.Command('gfal-legacy-bringonline').bake(_tty_out=False)
        self._cp_cmd = sh.Command('gfal-copy').bake(_tty_out=False)
        self._ls_se_cmd = sh.Command('gfal-ls').bake(color='never', _tty_out=False)
        self._move_cmd = sh.Command('gfal-rename').bake(_tty_out=False)
        self._mkdir_cmd = sh.Command('gfal-mkdir').bake(_tty_out=False)

        self._replicate_cmd = sh.Command('dirac-dms-replicate-lfn').bake(_tty_out=False)
        self._add_cmd = sh.Command('dirac-dms-add-file').bake(_tty_out=False)

    @staticmethod
    def _check_return_value(ret):
        if not ret['OK']:
            raise BackendException("Failed: %s", ret['Message'])
        for path, error in ret['Value']['Failed'].items():
            if ('No such' in error) or ('Directory does not' in error):
                raise DoesNotExistException("No such file or directory.")
            else:
                raise BackendException(error)

    def _is_dir(self, lurl):
        isdir = self.fc.isDirectory(lurl)
        self._check_return_value(isdir)
        return isdir['Value']['Successful'][lurl]

    def _is_file(self, lurl):
        isfile = self.fc.isFile(lurl)
        self._check_return_value(isfile)
        return isfile['Value']['Successful'][lurl]

    def _get_dir_entry(self, lurl, infodict=None):
        """Take a lurl and return a DirEntry."""
        # If no dctionary with the information is specified, get it from the catalogue
        try:
            md = infodict['MetaData']
        except TypeError:
            md = self.fc.getFileMetadata(lurl)
            if not md['OK']:
                raise BackendException("Failed to list path '%s': %s", lurl, md['Message'])
            for path, error in md['Value']['Failed'].items():
                if 'No such file' in error:
                    # File does not exist, maybe a directory?
                    md = self.fc.getDirectoryMetadata(lurl)
                    for path, error in md['Value']['Failed'].items():
                        raise DoesNotExistException("No such file or directory.")
                else:
                    raise BackendException(md['Value']['Failed'][lurl])
            md = md['Value']['Successful'][lurl]
        return DirEntry(posixpath.basename(lurl), mode=oct(md.get('Mode', -1)), links=md.get('links', -1), gid=md['OwnerGroup'], uid=md['Owner'], size=md.get('Size', -1), modified=str(md.get('ModificationDate', '?')))

    def _iter_directory(self, lurl):
        """Iterate over entries in a directory."""

        ret = self.fc.listDirectory(lurl)
        if not ret['OK']:
            raise BackendException("Failed to list path '%s': %s", lurl, ret['Message'])
        for path, error in ret['Value']['Failed'].items():
            if 'Directory does not' in error:
                # Dir does not exist, maybe a File?
                if self.fc.isFile(lurl):
                    lst = [(lurl, None)]
                    break
                else:
                    raise DoesNotExistException("No such file or Directory.")
            else:
                raise BackendException(ret['Value']['Failed'][lurl])
        else:
            # Sort items by keys, i.e. paths
            lst = sorted(ret['Value']['Successful'][lurl]['Files'].items() + ret['Value']['Successful'][lurl]['SubDirs'].items())

        for item in lst:
            yield item # = path, dict

    def _ls(self, lurl, **kwargs):
        # Translate keyword arguments
        d = kwargs.pop('directory', False)

        if d:
            # Just the requested entry itself
            yield self._get_dir_entry(lurl)
            return

        for path, info in self._iter_directory(lurl):
            yield self._get_dir_entry(path, info)

    def _ls_se(self, surl, **kwargs):
        # Translate keyword arguments
        d = kwargs.pop('directory', False)
        args = []
        if -d:
            args.append('-d')
        args.append('-l')
        args.append(surl)
        try:
            output = self._ls_se_cmd(*args, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or Directory.")
            else:
                raise BackendException(e.stderr)
        for line in output:
            fields = line.split()
            mode, links, gid, uid, size = fields[:5]
            name = fields[-1]
            modified = ' '.join(fields[5:-1])
            yield DirEntry(name, mode=mode, links=int(links), gid=gid, uid=uid, size=int(size), modified=modified)

    def _replicas(self, lurl, **kwargs):
        # Check the lurl actually exists
        self._ls(lurl, directory=True)

        rep = self.dirac.getReplicas(lurl)
        self._check_return_value(rep)
        rep = rep['Value']['Successful'][lurl]

        return rep.values()

    def _exists(self, surl, **kwargs):
        try:
            ret = self._ls_se_cmd(surl, '-d', '-l', **kwargs).strip()
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                return False
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        else:
            return ret[0] != 'd' # Return `False` for directories

    def _register(self, surl, lurl, verbose=False, **kwargs):
        # Register an existing physical copy in the file catalogue
        se = storage.get_SE(surl).name
        # See if file already exists in DFC
        ret = self.fc.getFileMetadata(lurl)
        try:
            self._check_return_value(ret)
        except DoesNotExistException:
            # Add new file
            size = next(self._ls_se(surl, directory=True)).size
            checksum = self.checksum(surl)
            guid = str(uuid.uuid4()) # The guid does not seem to be important. Make it unique if possible.
            ret = self.dm.registerFile((lurl, surl, size, se, guid, checksum))
        else:
            # Add new replica
            ret = self.dm.registerReplica((lurl, surl, se))

        self._check_return_value(ret)
        if verbose:
            print_("Successfully registered replica %s of %s from %s."%(surl, lurl, se))
        return True

    def _deregister(self, surl, lurl, verbose=False, **kwargs):
        # DIRAC only needs to know the SE name to deregister a replica
        se = storage.get_SE(surl).name
        ret = self.dm.removeReplicaFromCatalog(se, [lurl])
        self._check_return_value(ret)
        if verbose:
            print_("Successfully deregistered replica of %s from %s."%(lurl, se))
        return True

    def _state(self, surl, **kwargs):
        try:
            state = self._xattr_cmd(surl, 'user.status', **kwargs).strip()
        except sh.ErrorReturnCode as e:
            if "No such file" in e.stderr:
                raise DoesNotExistException("No such file or Directory.")
            state = '?'
        except sh.SignalException_SIGSEGV:
            state = '?'
        return state

    def _checksum(self, surl, **kwargs):
        try:
            checksum = self._replica_checksum_cmd(surl, 'ADLER32', **kwargs).split()[1]
        except sh.ErrorReturnCode:
            checksum = '?'
        except sh.SignalException_SIGSEGV:
            checksum = '?'
        except IndexError:
            checksum = '?'
        return checksum

    def _bringonline(self, surl, timeout, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        # gfal does not notice when files come online, it seems
        # Just send a single short request, then check regularly

        if verbose:
            out = sys.stdout
        else:
            out = None

        end = time.time() + timeout

        try:
            self._bringonline_cmd('-t', 10, surl, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            # The command fails if the file is not online
            # To be expected after 10 seconds
            if "No such file" in e.stderr:
                # Except when the file does not actually exist on the tape storage
                raise DoesNotExistException("No such file or Directory.")

        wait = 5
        while(True):
            if verbose:
                print_("Checking replica state...")
            if self.is_online(surl):
                if verbose:
                    print_("Replica brought online.")
                return True

            time_left = end - time.time()
            if time_left <= 0:
                if verbose:
                    print_("Could not bring replica online.")
                return False

            wait *= 2
            if time_left < wait:
                wait = time_left

            if verbose:
                print_("Timeout remaining: %d s"%(time_left))
                print_("Checking again in: %d s"%(wait))
            time.sleep(wait)

    def _replicate(self, source_surl, destination_surl, lurl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None

        source = storage.get_SE(source_surl).name
        destination = storage.get_SE(destination_surl).name
        try:
            self._replicate_cmd(lurl, destination, source, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)

        return True

    def _get(self, surl, localpath, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        try:
            self._cp_cmd('-f', '--checksum', 'ADLER32', surl, localpath, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return os.path.isfile(localpath)

    def _put(self, localpath, surl, lurl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        se = storage.get_SE(surl).name

        try:
            self._add_cmd(lurl, localpath, se, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return True

    def _remove(self, surl, lurl, last=False, verbose=False, **kwargs):
        se = storage.get_SE(surl).name

        if last:
            # Delete lfn
            if verbose:
                print_("Removing all replicas of %s."%(lurl,))
            ret = self.dm.removeFile([lurl])
        else:
            if verbose:
                print_("Removing replica of %s from %s."%(lurl, se))
            ret = self.dm.removeReplica(se, [lurl])

        if not ret['OK']:
            raise BackendException('Failed: %s'%(ret['Message']))

        for lurl, error in ret['Value']['Failed'].items():
            if 'No such file' in error:
                raise DoesNotExistException("No such file or directory.")
            else:
                raise BackendException(error)

        return True

    def _rmdir(self, lurl, verbose=False):
        """Remove the an empty directory from the catalogue."""
        rep = self.fc.removeDirectory(lurl)
        self._check_return_value(rep)
        return True

    def _move_replica(self, surl, new_surl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None

        try:
            folder = posixpath.dirname(new_surl)
            self._mkdir_cmd(folder, '-p', _out=out, **kwargs)
            self._move_cmd(surl, new_surl, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return True