Exemplo n.º 1
0
    def initialize(self):

        self.RequestDBClient = RequestClient()

        gMonitor.registerActivity("Iteration", "Agent Loops", "ZuziaAgent",
                                  "Loops/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("Attempted", "Request Processed",
                                  "ZuziaRAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Successful", "Request Forward Successful",
                                  "ZuziaAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Failed", "Request Forward Failed",
                                  "ZuziaAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        self.local = PathFinder.getServiceURL("RequestManagement/localURL")
        if not self.local:
            self.local = AgentModule.am_getOption(self, 'localURL', '')
        if not self.local:
            errStr = 'The RequestManagement/localURL option must be defined.'
            gLogger.fatal(errStr)
            return S_ERROR(errStr)

        self.central = PathFinder.getServiceURL("RequestManagement/centralURL")
        if not self.central:
            errStr = 'The RequestManagement/centralURL option must be defined.'
            gLogger.fatal(errStr)
            return S_ERROR(errStr)
        return S_OK()
Exemplo n.º 2
0
    def __setRemovalRequest(self, lfn, ownerDN, ownerGroup):
        """ Set removal request with the given credentials
    """
        request = RequestContainer()
        request.setRequestAttributes({
            'OwnerDN': ownerDN,
            'OwnerGroup': ownerGroup
        })
        requestName = os.path.basename(lfn).strip() + '_removal_request.xml'
        request.setRequestName(requestName)
        request.setSourceComponent('JobCleaningAgent')

        removalDict = {
            'Attributes': {
                'Operation': 'removeFile',
                'TargetSE': '',
                'ExecutionOrder': 0
            }
        }
        result = request.addSubRequest(removalDict, 'removal')
        if not result['OK']:
            return result

        index = result['Value']
        fileDict = {'LFN': lfn, 'PFN': '', 'Status': 'Waiting'}
        request.setSubRequestFiles(index, 'removal', [fileDict])

        client = RequestClient()
        result = request.toXML()
        if not result['OK']:
            return result
        xmlRequest = result['Value']
        result = client.setRequest(requestName, xmlRequest)
        return result
Exemplo n.º 3
0
  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None
Exemplo n.º 4
0
    def initialize(self):

        self.RequestDBClient = RequestClient()
        backend = self.am_getOption('Backend', '')
        self.RequestDB = False
        if backend == 'mysql':
            from DIRAC.RequestManagementSystem.DB.RequestDBMySQL import RequestDBMySQL
            requestDB = RequestDBMySQL()
            if requestDB._connected:
                self.RequestDB = requestDB

        gMonitor.registerActivity("Iteration", "Agent Loops",
                                  "DISETForwardingAgent", "Loops/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Attempted", "Request Processed",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Successful", "Request Forward Successful",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Failed", "Request Forward Failed",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        self.local = PathFinder.getServiceURL("RequestManagement/localURL")
        if not self.local:
            self.local = AgentModule.am_getOption(self, 'localURL', '')
        if not self.local:
            errStr = 'The RequestManagement/localURL option must be defined.'
            gLogger.fatal(errStr)
            return S_ERROR(errStr)
        return S_OK()
Exemplo n.º 5
0
  def __setRemovalRequest( self, lfn, ownerDN, ownerGroup ):
    """ Set removal request with the given credentials
    """
    request = RequestContainer()
    request.setRequestAttributes( { 'OwnerDN':ownerDN, 'OwnerGroup':ownerGroup } )
    requestName = os.path.basename( lfn ).strip()+'_removal_request.xml'
    request.setRequestName( requestName )
    request.setSourceComponent( 'JobCleaningAgent' )

    removalDict = {'Attributes':{ 'Operation':'removeFile',
                                  'TargetSE':'',
                                  'ExecutionOrder':0
                                }
                   }
    result = request.addSubRequest( removalDict, 'removal' )
    if not result['OK']:
      return result

    index = result['Value']
    fileDict = { 'LFN':lfn, 'PFN':'', 'Status':'Waiting' }
    request.setSubRequestFiles( index, 'removal', [fileDict] )

    client = RequestClient()
    result = request.toXML()
    if not result['OK']:
      return result
    xmlRequest = result['Value']
    result = client.setRequest( requestName, xmlRequest )
    return result
Exemplo n.º 6
0
def _sendToFailover( rpcStub ):
  requestClient = RequestClient()
  request = RequestContainer()
  request.setDISETRequest( rpcStub )

  requestStub = request.toXML()['Value']
  return requestClient.setRequest( "Accounting.DataStore.%s.%s" % ( time.time(), random.random() ),
                                   requestStub )
Exemplo n.º 7
0
def _sendToFailover( rpcStub ):
  requestClient = RequestClient()
  request = RequestContainer()
  request.setDISETRequest( rpcStub )

  requestStub = request.toXML()['Value']
  return requestClient.setRequest( "Accounting.DataStore.%s.%s" % ( time.time(), random.random() ),
                                   requestStub )
Exemplo n.º 8
0
  def initialize(self):

    self.graceRemovalPeriod = self.am_getOption('GraceRemovalPeriod',7)
    self.checkAssigned = self.am_getOption('CheckAssigned',True)
    self.ftsCleaning = self.am_getOption('FTSCleaning',True)
    self.requestClient = RequestClient()
    
    return S_OK()
Exemplo n.º 9
0
    def initialize(self):

        self.RequestDBClient = RequestClient()
        self.ReplicaManager = ReplicaManager()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()
Exemplo n.º 10
0
    def __init__(self, transClient=None, logger=None, requestClient=None):

        if not logger:
            logger = gLogger.getSubLogger('RequestTasks')

        super(RequestTasks, self).__init__(transClient, logger)

        if not requestClient:
            self.requestClient = RequestClient()
        else:
            self.requestClient = requestClient
Exemplo n.º 11
0
    def __init__(self, transClient=None, logger=None, requestClient=None):

        if not logger:
            logger = gLogger.getSubLogger('RequestTasks')

        super(RequestTasks, self).__init__(transClient, logger)

        if not requestClient:
            from DIRAC.RequestManagementSystem.Client.RequestClient import RequestClient
            self.requestClient = RequestClient()
        else:
            self.requestClient = requestClient
Exemplo n.º 12
0
class RequestManagerHandlerTests( unittest.TestCase ):
  """
  .. class:: RequestManagerHandlerTests

  """

  def setUp( self ):
    """ test setup

    :param self: self reference
    """
    self.request = Request()
    self.request.RequestName = "RequestManagerHandlerTests"
    self.request.OwnerDN = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=cibak/CN=605919/CN=Krzysztof Ciba"
    self.request.OwnerGroup = "dirac_user"
    self.operation = Operation()
    self.operation.Type = "ReplicateAndRegister"
    self.operation.TargetSE = "CERN-USER"
    self.file = File()
    self.file.LFN = "/lhcb/user/c/cibak/testFile"
    self.file.Checksum = "123456"
    self.file.ChecksumType = "ADLER32"
    self.request.addOperation( self.operation )
    self.operation.addFile( self.file )
    # # xml representation of a whole request
    self.xmlStr = self.request.toXML( True )["Value"]
    # # request client
    self.requestClient = RequestClient()


  def tearDown( self ):
    """ test case tear down """
    del self.request
    del self.operation
    del self.file
    del self.xmlStr

  def test01PutRequest( self ):
    """ test set request """
    put = self.requestClient.putRequest( self.request )
    self.assertEqual( put["OK"], True, "put failed" )

  def test02GetRequest( self ):
    """ test get request """
    get = self.requestClient.getRequest( self.request.RequestName )
    self.assertEqual( get["OK"], True, "get failed" )

  def test03DeleteRequest( self ):
    """ test delete request """
    delete = self.requestClient.deleteRequest( "test" )
    self.assertEqual( delete["OK"], True, "delete failed" )
Exemplo n.º 13
0
class RequestCleaningAgent(AgentModule):

  def initialize(self):

    self.graceRemovalPeriod = self.am_getOption('GraceRemovalPeriod',7)
    self.checkAssigned = self.am_getOption('CheckAssigned',True)
    self.assignedResetDelay = self.am_getOption('AssignedResetDelay',7200)
    self.ftsCleaning = self.am_getOption('FTSCleaning',True)
    self.requestClient = RequestClient()
    
    return S_OK()

  def execute(self):
    """ Main execution method
    """
    
    toDate = dateTime() - day*self.graceRemovalPeriod
    result = self.requestClient.selectRequests({'Status':'Done','ToDate':str(toDate)})
    if not result['OK']:
      return result
    requestDict = result['Value']
    for rID,rName in requestDict.items():

      gLogger.verbose("Removing request %s" % rName)
      result = self.requestClient.deleteRequest(rName)
      if not result['OK']:
        gLogger.error('Failed to delete request %s' % rName, result['Message'])
      else:
        gLogger.info('Successfully removed request %d/%s' % (rID,rName) )
      
    if self.checkAssigned:
      toDate = dateTime() - second*self.assignedResetDelay
      result = self.requestClient.selectRequests({'Status':'Assigned','ToDate':str(toDate)})
      if not result['OK']:
        return result
      requestDict = result['Value']
      for rID,rName in requestDict.items():
        gLogger.verbose("Resetting request %s to Waiting" % rName)
        result = self.requestClient.setRequestStatus(rName,'Waiting')
        if not result['OK']:
          gLogger.error('Failed to reset request %s to Waiting' % rName, result['Message'])
        else:
          gLogger.info('Successfully reset request %d/%s to Waiting' % (rID,rName) )

    if self.ftsCleaning:
      pass
    
    return S_OK()  
Exemplo n.º 14
0
  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None
Exemplo n.º 15
0
    def initialize(self):

        self.RequestDBClient = RequestClient()
        backend = self.am_getOption('Backend', '')
        self.RequestDB = False
        if backend == 'mysql':
            from DIRAC.RequestManagementSystem.DB.RequestDBMySQL import RequestDBMySQL
            requestDB = RequestDBMySQL()
            if requestDB._connected:
                self.RequestDB = requestDB

        gMonitor.registerActivity("Iteration", "Agent Loops",
                                  "DISETForwardingAgent", "Loops/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Attempted", "Request Processed",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Successful", "Request Forward Successful",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Failed", "Request Forward Failed",
                                  "DISETForwardingAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        #self.local = PathFinder.getServiceURL( "RequestManagement/localURL" )
        #if not self.local:
        #  self.local = AgentModule.am_getOption( self, 'localURL', '' )
        #if not self.local:
        #  errStr = 'The RequestManagement/localURL option must be defined.'
        #  gLogger.fatal( errStr )
        #  return S_ERROR( errStr )
        return S_OK()
Exemplo n.º 16
0
 def requestClient(cls):
     """ RequestClient getter
 :param cls: class reference
 """
     if not cls.__requestClient:
         cls.__requestClient = RequestClient()
     return cls.__requestClient
Exemplo n.º 17
0
    def initialize(self):

        self.RequestDBClient = RequestClient()
        self.ReplicaManager = ReplicaManager()
        self.DataLog = DataLoggingClient()

        self.maxNumberOfThreads = self.am_getOption('NumberOfThreads', 1)
        self.threadPoolDepth = self.am_getOption('ThreadPoolDepth', 1)
        self.threadPool = ThreadPool(1, self.maxNumberOfThreads)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()
  def initialize( self ):
    """Sets defaults """
    self.replicaManager = ReplicaManager()
    self.transClient = TransformationClient()
    self.wmsClient = WMSClient()
    self.requestClient = RequestClient()
    self.metadataClient = FileCatalogClient()
    self.storageUsageClient = StorageUsageClient()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication'] ) )
    gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'] ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days
    gLogger.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    gLogger.info( "Will remove logs found on storage element: %s" % self.logSE )
    return S_OK()
Exemplo n.º 19
0
 def __getRequestFileStatusOLDSystem(self, requestName, lfns):
     """ for the OLD RMS
 """
     # FIXME: this should disappear
     try:
         return RequestClient().getRequestFileStatus(requestName, lfns)
     except RuntimeError:
         return S_ERROR('RuntimeError')
Exemplo n.º 20
0
  def requestClient( self ):
    """ RequestClient getter

    :param self: self reference
    """
    if not self.__requestClient:
      self.__requestClient = RequestClient()
    return self.__requestClient
Exemplo n.º 21
0
 def requestClient(cls):
     """ RequestClient getter
 :param cls: class reference
 """
     if not cls.__requestClient:
         from DIRAC.RequestManagementSystem.Client.RequestClient import RequestClient
         cls.__requestClient = RequestClient()
     return cls.__requestClient
Exemplo n.º 22
0
  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()
Exemplo n.º 23
0
    def initialize(self):
        """Sets defaults """
        self.replicaManager = ReplicaManager()
        self.transClient = TransformationClient()
        self.wmsClient = WMSClient()
        self.requestClient = RequestClient()
        self.metadataClient = FileCatalogClient()
        self.storageUsageClient = StorageUsageClient()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        self.transformationTypes = sortList(
            self.am_getOption('TransformationTypes', [
                'MCSimulation', 'DataReconstruction', 'DataStripping',
                'MCStripping', 'Merge', 'Replication'
            ]))
        gLogger.info("Will consider the following transformation types: %s" %
                     str(self.transformationTypes))
        self.directoryLocations = sortList(
            self.am_getOption(
                'DirectoryLocations',
                ['TransformationDB', 'StorageUsage', 'MetadataCatalog']))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        self.archiveAfter = self.am_getOption('ArchiveAfter', 7)  # days
        gLogger.info("Will archive Completed transformations after %d days" %
                     self.archiveAfter)
        self.activeStorages = sortList(self.am_getOption('ActiveSEs', []))
        gLogger.info("Will check the following storage elements: %s" %
                     str(self.activeStorages))
        self.logSE = self.am_getOption('TransformationLogSE', 'LogSE')
        gLogger.info("Will remove logs found on storage element: %s" %
                     self.logSE)
        return S_OK()
Exemplo n.º 24
0
  def __init__( self, transClient = None, logger = None, requestClient = None ):

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      self.requestClient = RequestClient()
    else:
      self.requestClient = requestClient
Exemplo n.º 25
0
 def __getRequestFileStatusOLDSystem(self, requestName, lfns):
     """ for the OLD RMS
 """
     # FIXME: this should disappear
     try:
         res = RequestClient().getRequestFileStatus(requestName, lfns)
         if res['OK']:
             return res['Value']
         else:
             return {}
     except RuntimeError:
         return {}
Exemplo n.º 26
0
  def __init__( self, transClient = None, logger = None, requestClient = None ):

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      from DIRAC.RequestManagementSystem.Client.RequestClient import RequestClient
      self.requestClient = RequestClient()
    else:
      self.requestClient = requestClient
Exemplo n.º 27
0
 def __getRequestIDOLDSystem(self, requestName):
     """ for the OLD RMS
 """
     # FIXME: this should disappear
     try:
         res = RequestClient().getRequestInfo(requestName)
         if res['OK']:
             return res['Value'][0]
         else:
             return 0
     except RuntimeError:
         return 0
Exemplo n.º 28
0
  def setUp( self ):
    """ test setup

    :param self: self reference
    """
    self.request = Request()
    self.request.RequestName = "RequestManagerHandlerTests"
    self.request.OwnerDN = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=cibak/CN=605919/CN=Krzysztof Ciba"
    self.request.OwnerGroup = "dirac_user"
    self.operation = Operation()
    self.operation.Type = "ReplicateAndRegister"
    self.operation.TargetSE = "CERN-USER"
    self.file = File()
    self.file.LFN = "/lhcb/user/c/cibak/testFile"
    self.file.Checksum = "123456"
    self.file.ChecksumType = "ADLER32"
    self.request.addOperation( self.operation )
    self.operation.addFile( self.file )
    # # xml representation of a whole request
    self.xmlStr = self.request.toXML( True )["Value"]
    # # request client
    self.requestClient = RequestClient()
Exemplo n.º 29
0
  def initialize( self ):
    """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
    self.requestDBClient = RequestClient()
    # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
    self.RequestDBClient = self.requestDBClient
    self.replicaManager = ReplicaManager()

    gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size",
                               "RemovalAgent", "Bytes", gMonitor.OP_ACUM )

    gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileDone", "File removal done",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileFail", "File removal failed",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads )
    self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue )

    # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
    self.threadPool.daemonize()

    self.maxRequests = self.am_getOption( 'MaxRequestsPerCycle', 1200. )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()
Exemplo n.º 30
0
    def __init__(self, agentName, baseAgentName=False, properties=dict()):
        """ c'tor

    :param self: self reference
    :param str agentName: name of agent
    :param bool baseAgentName: whatever  
    :param dict properties: whatever else
    """
        AgentModule.__init__(self, agentName, baseAgentName, properties)
        ## replica manager
        self.replicaManager = ReplicaManager()
        ## transformation client
        self.transClient = TransformationClient()
        ## wms client
        self.wmsClient = WMSClient()
        ## request client
        self.requestClient = RequestClient()
        ## file catalog clinet
        self.metadataClient = FileCatalogClient()
        ## storage usage agent
        self.storageUsageClient = StorageUsageClient()

        ## placeholders for CS options

        ## transformations types
        self.transformationTypes = None
        ## directory locations
        self.directoryLocations = None
        ## transformation metadata
        self.transfidmeta = None
        ## archive periof in days
        self.archiveAfter = None
        ## active SEs
        self.activeStorages = None
        ## transformation log SEs
        self.logSE = None
        ## enable/disable execution
        self.enableFlag = None
Exemplo n.º 31
0
 def initialize(self):
   """Sets defaults
   """
   self.enableFlag = '' #defined below
   self.replicaManager = ReplicaManager()
   self.prodDB = TransformationClient()
   self.requestClient = RequestClient()
   self.taskIDName = 'TaskID' 
   self.externalStatus = 'ExternalStatus'
   self.externalID = 'ExternalID'
   self.am_setOption('PollingTime',2*60*60) #no stalled jobs are considered so can be frequent
   self.enableFlag = self.am_getOption('EnableFlag', False)
   self.am_setModuleParam("shifterProxy", "ProductionManager")
   self.ops = Operations()
   return S_OK()
Exemplo n.º 32
0
  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()
    self.DataLog = DataLoggingClient()

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', 1 )
    self.threadPoolDepth = self.am_getOption( 'ThreadPoolDepth', 1 )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()
Exemplo n.º 33
0
 def __deleteSandboxFromExternalBackend(self, SEName, SEPFN):
     if self.getCSOption("DelayedExternalDeletion", True):
         gLogger.info("Setting deletion request")
         try:
             request = RequestContainer()
             result = request.addSubRequest(
                 {
                     'Attributes': {
                         'Operation': 'removePhysicalFile',
                         'TargetSE': SEName,
                         'ExecutionOrder': 1
                     }
                 }, 'removal')
             index = result['Value']
             fileDict = {'PFN': SEPFN, 'Status': 'Waiting'}
             request.setSubRequestFiles(index, 'removal', [fileDict])
             return RequestClient().setRequest(
                 "RemoteSBDeletion:%s|%s:%s" % (SEName, SEPFN, time.time()),
                 request.toXML()['Value'])
         except Exception, e:
             gLogger.exception("Exception while setting deletion request")
             return S_ERROR("Cannot set deletion request: %s" % str(e))
Exemplo n.º 34
0
  def __init__( self, agentName, baseAgentName = False,	properties = dict() ):
    """ c'tor

    :param self: self reference
    :param str agentName: name of agent
    :param bool baseAgentName: whatever  
    :param dict properties: whatever else
    """
    AgentModule.__init__( self, agentName, baseAgentName, properties )
    ## replica manager
    self.replicaManager = ReplicaManager()
    ## transformation client
    self.transClient = TransformationClient()
    ## wms client
    self.wmsClient = WMSClient()
    ## request client
    self.requestClient = RequestClient()
    ## file catalog clinet
    self.metadataClient = FileCatalogClient()
    ## storage usage agent
    self.storageUsageClient = StorageUsageClient()

    ## placeholders for CS options

    ## transformations types
    self.transformationTypes = None 
    ## directory locations
    self.directoryLocations = None 
    ## transformation metadata
    self.transfidmeta = None 
    ## archive periof in days
    self.archiveAfter = None 
    ## active SEs
    self.activeStorages = None 
    ## transformation log SEs
    self.logSE = None 
    ## enable/disable execution
    self.enableFlag = None 
Exemplo n.º 35
0
  def initialize(self):

    self.RequestDBClient = RequestClient()

    gMonitor.registerActivity("Iteration",          "Agent Loops",                  "ZuziaAgent",      "Loops/min",      gMonitor.OP_SUM)
    gMonitor.registerActivity("Attempted",          "Request Processed",            "ZuziaRAgent",     "Requests/min",   gMonitor.OP_SUM)
    gMonitor.registerActivity("Successful",         "Request Forward Successful",   "ZuziaAgent",      "Requests/min",   gMonitor.OP_SUM)
    gMonitor.registerActivity("Failed",             "Request Forward Failed",       "ZuziaAgent",      "Requests/min",   gMonitor.OP_SUM)

    self.local = PathFinder.getServiceURL("RequestManagement/localURL")
    if not self.local:
      self.local = AgentModule.am_getOption(self,'localURL','')
    if not self.local:
      errStr = 'The RequestManagement/localURL option must be defined.'
      gLogger.fatal(errStr)
      return S_ERROR(errStr)

    self.central = PathFinder.getServiceURL("RequestManagement/centralURL")
    if not self.central:
      errStr = 'The RequestManagement/centralURL option must be defined.'
      gLogger.fatal(errStr)
      return S_ERROR(errStr)
    return S_OK()
Exemplo n.º 36
0
  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()
    self.DataLog = DataLoggingClient()

    gMonitor.registerActivity( "Iteration", "Agent Loops", "TransferAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Execute", "Request Processed", "TransferAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed", "TransferAgent", "Requests/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "Replicate and register", "Replicate and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Replicate", "Replicate operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Put and register", "Put and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Put", "Put operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "Replication successful", "Successful replications", "TransferAgent", "Successful/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Put successful", "Successful puts", "TransferAgent", "Successful/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "Replication failed", "Failed replications", "TransferAgent", "Failed/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Put failed", "Failed puts", "TransferAgent", "Failed/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "Replica registration successful", "Successful replica registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "File registration successful", "Successful file registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "Replica registration failed", "Failed replica registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "File registration failed", "Failed file registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM )

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', 1 )
    self.threadPoolDepth = self.am_getOption( 'ThreadPoolDepth', 1 )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()
Exemplo n.º 37
0
class RegistrationAgent( AgentModule, RequestAgentMixIn ):

  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()
    self.DataLog = DataLoggingClient()

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', 1 )
    self.threadPoolDepth = self.am_getOption( 'ThreadPoolDepth', 1 )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    for i in range( self.threadPoolDepth ):
      requestExecutor = ThreadedJob( self.executeRequest )
      self.threadPool.queueJob( requestExecutor )
    self.threadPool.processResults()
    return self.executeRequest()

  def executeRequest( self ):
    ################################################
    # Get a request from request DB
    res = self.RequestDBClient.getRequest( 'register' )
    if not res['OK']:
      gLogger.info( "RegistrationAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "RegistrationAgent.execute: No requests to be executed found." )
      return S_OK()
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    sourceServer = res['Value']['Server']
    try:
      jobID = int( res['Value']['JobID'] )
    except:
      jobID = 0
    gLogger.info( "RegistrationAgent.execute: Obtained request %s" % requestName )

    result = self.RequestDBClient.getCurrentExecutionOrder( requestName, sourceServer )
    if result['OK']:
      currentOrder = result['Value']
    else:
      return S_OK( 'Can not get the request execution order' )

    oRequest = RequestContainer( request = requestString )

    ################################################
    # Find the number of sub-requests from the request
    res = oRequest.getNumSubRequests( 'register' )
    if not res['OK']:
      errStr = "RegistrationAgent.execute: Failed to obtain number of transfer subrequests."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    gLogger.info( "RegistrationAgent.execute: Found %s sub requests." % res['Value'] )

    ################################################
    # For all the sub-requests in the request
    modified = False
    for ind in range( res['Value'] ):
      gLogger.info( "RegistrationAgent.execute: Processing sub-request %s." % ind )
      subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'register' )['Value']
      subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] )
      subStatus = subRequestAttributes['Status']
      if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
        subRequestFiles = oRequest.getSubRequestFiles( ind, 'register' )['Value']
        operation = subRequestAttributes['Operation']

        ################################################
        #  If the sub-request is a register file operation
        if operation == 'registerFile':
          gLogger.info( "RegistrationAgent.execute: Attempting to execute %s sub-request." % operation )
          diracSE = str( subRequestAttributes['TargetSE'] )
          if diracSE == 'SE':
            # We do not care about SE, put any there
            diracSE = "CERN-FAILOVER"
          catalog = subRequestAttributes['Catalogue']
          if catalog == "None":
            catalog = ''
          subrequest_done = True
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              lfn = subRequestFile.get( 'LFN', '' )
              if lfn: lfn = str( lfn )
              physicalFile = subRequestFile.get( 'PFN', '' )
              if physicalFile: physicalFile = str( physicalFile )
              fileSize = subRequestFile.get( 'Size', 0 )
              if fileSize: fileSize = int( fileSize )
              fileGuid = subRequestFile.get( 'GUID', '' )
              if fileGuid: fileGuid = str( fileGuid )
              checksum = subRequestFile.get( 'Addler', '' )
              if checksum: checksum = str( checksum )
              if catalog == 'BookkeepingDB':
                diracSE = 'CERN-HIST'
              fileTuple = ( lfn, physicalFile, fileSize, diracSE, fileGuid, checksum )
              res = self.ReplicaManager.registerFile( fileTuple, catalog )
              print res
              if not res['OK']:
                self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent' )
                errStr = "RegistrationAgent.execute: Completely failed to register file."
                gLogger.error( errStr, res['Message'] )
                subrequest_done = False
              elif lfn in res['Value']['Failed'].keys():
                self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent' )
                errStr = "RegistrationAgent.execute: Completely failed to register file."
                gLogger.error( errStr, res['Value']['Failed'][lfn] )
                subrequest_done = False
              else:
                self.DataLog.addFileRecord( lfn, 'Register', diracSE, '', 'TransferAgent' )
                oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' )
                modified = True
            else:
              gLogger.info( "RegistrationAgent.execute: File already completed." )
          if subrequest_done:
            oRequest.setSubRequestStatus( ind, 'register', 'Done' )

        ################################################
        #  If the sub-request is none of the above types
        else:
          gLogger.error( "RegistrationAgent.execute: Operation not supported.", operation )

        ################################################
        #  Determine whether there are any active files
        if oRequest.isSubRequestEmpty( ind, 'register' )['Value']:
          oRequest.setSubRequestStatus( ind, 'register', 'Done' )

      ################################################
      #  If the sub-request is already in terminal state
      else:
        gLogger.info( "RegistrationAgent.execute: Sub-request %s is status '%s' and  not to be executed." % ( ind, subRequestAttributes['Status'] ) )

    ################################################
    #  Generate the new request string after operation
    requestString = oRequest.toXML()['Value']
    res = self.RequestDBClient.updateRequest( requestName, requestString, sourceServer )

    if modified and jobID:
      result = self.finalizeRequest( requestName, jobID, sourceServer )

    return S_OK()
class TransformationCleaningAgent( AgentModule ):

  #############################################################################
  def initialize( self ):
    """Sets defaults """
    self.replicaManager = ReplicaManager()
    self.transClient = TransformationClient()
    self.wmsClient = WMSClient()
    self.requestClient = RequestClient()
    self.metadataClient = FileCatalogClient()
    self.storageUsageClient = StorageUsageClient()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication'] ) )
    gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'] ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days
    gLogger.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    gLogger.info( "Will remove logs found on storage element: %s" % self.logSE )
    return S_OK()

  #############################################################################
  def execute( self ):
    """ The TransformationCleaningAgent execution method.
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag' % ( self.section ) )
      return S_OK( 'Disabled via CS flag' )

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( {'Status':'Cleaning', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.cleanTransformation( transDict['TransformationID'] )

    # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( {'Status':'RemovingFiles', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.removeTransformationOutput( transDict['TransformationID'] )

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( {'Status':'Completed', 'Type':self.transformationTypes}, older = olderThanTime )
    if res['OK']:
      for transDict in res['Value']:
        self.archiveTransformation( transDict['TransformationID'] )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'StorageUsage' in self.directoryLocations:
      res = self.storageUsageClient.getStorageDirectories( '', '', transID, [] )
      if not res['OK']:
        gLogger.error( "Failed to obtain storage usage directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  def __addDirs( self, transID, newDirs, existingDirs ):
    for dir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, dir ):
        if not dir in existingDirs:
          existingDirs.append( dir )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    gLogger.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      gLogger.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      gLogger.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove storage directory", res['Message'] )
      return res
    gLogger.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    gLogger.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    gLogger.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      gLogger.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        gLogger.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    gLogger.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    gLogger.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove log files", res['Message'] )
      return res
    gLogger.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    gLogger.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    gLogger.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    gLogger.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB """
    gLogger.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes any mention of the supplied transformation 
    """
    gLogger.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID, directories ):
    res = self.metadataClient.findFilesByMetadata( {self.transfidmeta:transID} )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not len(fileToRemove):
      gLogger.info('No files found for transID %s'%transID)
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    gLogger.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        gLogger.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType == 'Replication':
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    res = self.transClient.getTransformationTasks( condDict = {'TransformationID':transID} )
    if not res['OK']:
      gLogger.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = []
    for taskDict in res['Value']:
      externalIDs.append( taskDict['ExternalID'] )
    gLogger.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    gLogger.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, jobIDs ):
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):
      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        gLogger.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ):
        gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif res.has_key( 'NonauthorizedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif res.has_key( 'FailedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False
    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    gLogger.info( "Successfully removed all tasks from the WMS" )
    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      gLogger.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    gLogger.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        gLogger.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        gLogger.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      gLogger.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      gLogger.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    gLogger.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 39
0
 def requestClient( cls ):
   """ request client getter """
   if not cls.__requestClient:
     cls.__requestClient = RequestClient()
   return cls.__requestClient
Exemplo n.º 40
0
class DataRecoveryAgent( AgentModule ):
  def __init__(self, *args, **kwargs):
    AgentModule.__init__( self, *args, **kwargs )
    self.name = 'DataRecoveryAgent'
    self.log = gLogger
  #############################################################################
  def initialize(self):
    """Sets defaults
    """
    self.enableFlag = '' #defined below
    self.replicaManager = ReplicaManager()
    self.prodDB = TransformationClient()
    self.requestClient = RequestClient()
    self.taskIDName = 'TaskID' 
    self.externalStatus = 'ExternalStatus'
    self.externalID = 'ExternalID'
    self.am_setOption('PollingTime',2*60*60) #no stalled jobs are considered so can be frequent
    self.enableFlag = self.am_getOption('EnableFlag', False)
    self.am_setModuleParam("shifterProxy", "ProductionManager")
    self.ops = Operations()
    return S_OK()
  #############################################################################
  def execute(self):
    """ The main execution method.
    """  
    self.log.info('Enable flag is %s' % self.enableFlag)  
    self.removalOKFlag = True
    
    transformationTypes = ['MCReconstruction', 'MCSimulation', 'MCReconstruction_Overlay', 'Merge']
    transformationStatus = ['Active', 'Completing']
    fileSelectionStatus = ['Assigned', 'MaxReset']
    updateStatus = 'Unused'
    wmsStatusList = ['Failed']
    #only worry about files > 12hrs since last update    
    selectDelay = self.am_getOption("Delay", 2) #hours 

    transformationDict = {}
    for transStatus in transformationStatus:
      result = self.getEligibleTransformations(transStatus, transformationTypes)
      if not result['OK']:
        self.log.error(result)
        return S_ERROR('Could not obtain eligible transformations for status "%s"' % (transStatus))
      
      if not result['Value']:
        self.log.info('No "%s" transformations of types %s to process.' % (transStatus, string.join(transformationTypes, ', ')))
        continue

      transformationDict.update(result['Value'])

    self.log.info('Selected %s transformations of types %s' % (len(transformationDict.keys()), string.join(transformationTypes, ', ')))
    self.log.verbose('The following transformations were selected out of %s:\n%s' % (string.join(transformationTypes, ', '), string.join(transformationDict.keys(), ', ')))

    trans = []
    #initially this was useful for restricting the considered list
    #now we use the DataRecoveryAgent in setups where IDs are low
    ignoreLessThan = self.ops.getValue("Transformations/IgnoreLessThan", '724') 
    
    if trans:
      self.log.info('Skipping all transformations except %s' % (string.join(trans, ', ')))
          
    for transformation, typeName in transformationDict.items():
      if trans:
        if not transformation in trans:
          continue
      if ignoreLessThan:
        if int(ignoreLessThan) > int(transformation):
          self.log.verbose('Ignoring transformation %s ( is less than specified limit %s )' % (transformation, ignoreLessThan))
          continue

      self.log.info('='*len('Looking at transformation %s type %s:' % (transformation, typeName)))
      self.log.info('Looking at transformation %s:' % (transformation))

      result = self.selectTransformationFiles(transformation, fileSelectionStatus)
      if not result['OK']:
        self.log.error(result)
        self.log.error('Could not select files for transformation %s' % transformation)
        continue
  
      if not result['Value']:
        self.log.info('No files in status %s selected for transformation %s' % (string.join(fileSelectionStatus, ', '), transformation))
        continue
    
      fileDict = result['Value']      
      result = self.obtainWMSJobIDs(transformation, fileDict, selectDelay, wmsStatusList)
      if not result['OK']:
        self.log.error(result)
        self.log.error('Could not obtain WMS jobIDs for files of transformation %s' % (transformation))
        continue
      if not result['Value']:
        self.log.info('No eligible WMS jobIDs found for %s files in list:\n%s ...' % (len(fileDict.keys()), fileDict.keys()[0]))
        continue
    
      jobFileDict = result['Value']
      fileCount = 0
      for lfnList in jobFileDict.values():
        fileCount += len(lfnList)
      
      if not fileCount:
        self.log.info('No files were selected for transformation %s after examining WMS jobs.' % transformation)
        continue
      
      self.log.info('%s files are selected after examining related WMS jobs' % (fileCount))   
      result = self.checkOutstandingRequests(jobFileDict)
      if not result['OK']:
        self.log.error(result)
        continue

      if not result['Value']:
        self.log.info('No WMS jobs without pending requests to process.')
        continue
      
      jobFileNoRequestsDict = result['Value']
      fileCount = 0
      for lfnList in jobFileNoRequestsDict.values():
        fileCount += len(lfnList)
      
      self.log.info('%s files are selected after removing any relating to jobs with pending requests' % (fileCount))
      result = self.checkDescendents(transformation, fileDict, jobFileNoRequestsDict)
      if not result['OK']:
        self.log.error(result)
        continue

      jobsWithFilesOKToUpdate = result['Value']['filesToMarkUnused']
      jobsWithFilesProcessed = result['Value']['filesprocessed']
      self.log.info('====> Transformation %s total files that can be updated now: %s' % (transformation, len(jobsWithFilesOKToUpdate)))

      filesToUpdateUnused = []
      for fileList in jobsWithFilesOKToUpdate:
        filesToUpdateUnused.append(fileList)
      
      if len(filesToUpdateUnused):
        result = self.updateFileStatus(transformation, filesToUpdateUnused, updateStatus)
        if not result['OK']:
          self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message']))
          continue
      else:
        self.log.info('There are no files with failed jobs to update for production %s in this cycle' % transformation)             

      filesToUpdateProcessed = []  
      for fileList in jobsWithFilesProcessed:
        filesToUpdateProcessed.append(fileList)
      
      if len(filesToUpdateProcessed):
        result = self.updateFileStatus(transformation, filesToUpdateProcessed, 'Processed')
        if not result['OK']:
          self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message']))
          continue          
      else:
        self.log.info('There are no files processed to update for production %s in this cycle' % transformation)              
      
    return S_OK()

  #############################################################################
  def getEligibleTransformations(self, status, typeList):
    """ Select transformations of given status and type.
    """
    res = self.prodDB.getTransformations(condDict = {'Status' : status, 'Type' : typeList})
    self.log.debug(res)
    if not res['OK']:
      return res
    transformations = {}
    for prod in res['Value']:
      prodID = prod['TransformationID']
      transformations[str(prodID)] = prod['Type']
    return S_OK(transformations)
  
  #############################################################################
  def selectTransformationFiles(self, transformation, statusList):
    """ Select files, production jobIDs in specified file status for a given transformation.
    """
    #Until a query for files with timestamp can be obtained must rely on the
    #WMS job last update
    res = self.prodDB.getTransformationFiles(condDict = {'TransformationID' : transformation, 'Status' : statusList})
    self.log.debug(res)
    if not res['OK']:
      return res
    resDict = {}
    for fileDict in res['Value']:
      if not fileDict.has_key('LFN') or not fileDict.has_key(self.taskIDName) or not fileDict.has_key('LastUpdate'):
        self.log.info('LFN, %s and LastUpdate are mandatory, >=1 are missing for:\n%s' % (self.taskIDName, fileDict))
        continue
      lfn = fileDict['LFN']
      jobID = fileDict[self.taskIDName]
      resDict[lfn] = jobID
    if resDict:
      self.log.info('Selected %s files overall for transformation %s' % (len(resDict.keys()), transformation))
    return S_OK(resDict)
  
  #############################################################################
  def obtainWMSJobIDs(self, transformation, fileDict, selectDelay, wmsStatusList):
    """ Group files by the corresponding WMS jobIDs, check the corresponding
        jobs have not been updated for the delay time.  Can't get into any 
        mess because we start from files only in MaxReset / Assigned and check
        corresponding jobs.  Mixtures of files for jobs in MaxReset and Assigned 
        statuses only possibly include some files in Unused status (not Processed 
        for example) that will not be touched.
    """
    prodJobIDs = uniqueElements(fileDict.values())
    self.log.info('The following %s production jobIDs apply to the selected files:\n%s' % (len(prodJobIDs), prodJobIDs))

    jobFileDict = {}
    condDict = {'TransformationID' : transformation, self.taskIDName : prodJobIDs}
    delta = datetime.timedelta( hours = selectDelay )
    now = dateTime()
    olderThan = now-delta

    res = self.prodDB.getTransformationTasks(condDict = condDict, older = olderThan,
                                             timeStamp = 'LastUpdateTime', inputVector = True)
    self.log.debug(res)
    if not res['OK']:
      self.log.error('getTransformationTasks returned an error:\n%s')
      return res
    
    for jobDict in res['Value']:
      missingKey = False
      for key in [self.taskIDName, self.externalID, 'LastUpdateTime', self.externalStatus, 'InputVector']:
        if not jobDict.has_key(key):
          self.log.info('Missing key %s for job dictionary, the following is available:\n%s' % (key, jobDict))
          missingKey = True
          continue
      
      if missingKey:
        continue
        
      job = jobDict[self.taskIDName]
      wmsID = jobDict[self.externalID]
      lastUpdate = jobDict['LastUpdateTime']
      wmsStatus = jobDict[self.externalStatus]
      jobInputData = jobDict['InputVector']
      jobInputData = [lfn.replace('LFN:','') for lfn in jobInputData.split(';')]
      
      if not int(wmsID):
        self.log.info('Prod job %s status is %s (ID = %s) so will not recheck with WMS' %(job, wmsStatus, wmsID))
        continue
      
      self.log.info('Job %s, prod job %s last update %s, production management system status %s' % (wmsID, job, lastUpdate, wmsStatus))
      #Exclude jobs not having appropriate WMS status - have to trust that production management status is correct        
      if not wmsStatus in wmsStatusList:
        self.log.info('Job %s is in status %s, not %s so will be ignored' % (wmsID, wmsStatus, string.join(wmsStatusList, ', ')))
        continue
        
      finalJobData = []
      #Must map unique files -> jobs in expected state
      for lfn,prodID in fileDict.items():
        if int(prodID) == int(job):
          finalJobData.append(lfn)
      
      self.log.info('Found %s files for job %s' % (len(finalJobData), job))    
      jobFileDict[wmsID] = finalJobData
 
    return S_OK(jobFileDict)
  
  #############################################################################
  def checkOutstandingRequests(self, jobFileDict):
    """ Before doing anything check that no outstanding requests are pending
        for the set of WMS jobIDs.
    """
    jobs = jobFileDict.keys()
    result = self.requestClient.getRequestForJobs(jobs)
    if not result['OK']:
      return result
    
    if not result['Value']:
      self.log.info('None of the jobs have pending requests')
      return S_OK(jobFileDict)
    
    for jobID in result['Value'].keys():
      del jobFileDict[str(jobID)]  
      self.log.info('Removing jobID %s from consideration until requests are completed' % (jobID))
    
    return S_OK(jobFileDict)
  
  ############################################################################
  def checkDescendents(self, transformation, filedict, jobFileDict):
    """ look that all jobs produced, or not output
    """
    res = self.prodDB.getTransformationParameters(transformation, ['Body'])
    if not res['OK']:
      self.log.error('Could not get Body from TransformationDB')
      return res
    body = res['Value']
    workflow = fromXMLString(body)
    workflow.resolveGlobalVars()

    olist = []
    jtype = workflow.findParameter('JobType')
    if not jtype:
      self.log.error('Type for transformation %d was not defined' % transformation)
      return S_ERROR('Type for transformation %d was not defined' % transformation)
    for step in workflow.step_instances:
      param = step.findParameter('listoutput')
      if not param:
        continue
      olist.extend(param.value)
    expectedlfns = []
    contactfailed = []
    fileprocessed = []
    files = []
    tasks_to_be_checked = {}
    for files in jobFileDict.values():
      for f in files:
        if f in filedict:
          tasks_to_be_checked[f] = filedict[f] #get the tasks that need to be checked
    for filep, task in tasks_to_be_checked.items():
      commons = {}
      commons['outputList'] = olist
      commons['PRODUCTION_ID'] = transformation
      commons['JOB_ID'] = task
      commons['JobType'] = jtype
      out = constructProductionLFNs(commons)
      expectedlfns = out['Value']['ProductionOutputData']
      res = self.replicaManager.getCatalogFileMetadata(expectedlfns)
      if not res['OK']:
        self.log.error('Getting metadata failed')
        contactfailed.append(filep)
        continue
      if not filep in files:
        files.append(filep)      
      success = res['Value']['Successful'].keys()
      failed = res['Value']['Failed'].keys()
      if len(success) and not len(failed):
        fileprocessed.append(filep)
        
    final_list_unused = files
    for file_all in files:
      if file_all in fileprocessed:
        try:
          final_list_unused.remove(filep)
        except:
          self.log.warn("Item not in list anymore")

        
    result = {'filesprocessed' : fileprocessed, 'filesToMarkUnused' : final_list_unused}    
    return S_OK(result)

  #############################################################################
  def updateFileStatus(self, transformation, fileList, fileStatus):
    """ Update file list to specified status.
    """
    if not self.enableFlag:
      self.log.info('Enable flag is False, would update  %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation))
      return S_OK()

    self.log.info('Updating %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation))
    result = self.prodDB.setFileStatusForTransformation(int(transformation), fileStatus, fileList, force = True)
    self.log.debug(result)
    if not result['OK']:
      self.log.error(result)
      return result
    if result['Value']['Failed']:
      self.log.error(result['Value']['Failed'])
      return result
    
    msg = result['Value']['Successful']
    for lfn, message in msg.items():
      self.log.info('%s => %s' % (lfn, message))
    
    return S_OK()
Exemplo n.º 41
0
class LFCvsSEAgent(AgentModule):
    def initialize(self):

        self.RequestDBClient = RequestClient()
        self.ReplicaManager = ReplicaManager()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):

        res = self.RequestDBClient.getRequest('integrity')
        if not res['OK']:
            gLogger.info(
                "LFCvsSEAgent.execute: Failed to get request from database.")
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "LFCvsSEAgent.execute: No requests to be executed found.")
            return S_OK()
        requestString = res['Value']['RequestString']
        requestName = res['Value']['RequestName']
        sourceServer = res['Value']['Server']
        gLogger.info("LFCvsSEAgent.execute: Obtained request %s" % requestName)
        oRequest = RequestContainer(request=requestString)

        ################################################
        # Find the number of sub-requests from the request
        res = oRequest.getNumSubRequests('integrity')
        if not res['OK']:
            errStr = "LFCvsSEAgent.execute: Failed to obtain number of integrity subrequests."
            gLogger.error(errStr, res['Message'])
            return S_OK()
        gLogger.info("LFCvsSEAgent.execute: Found %s sub requests." %
                     res['Value'])

        ################################################
        # For all the sub-requests in the request
        for ind in range(res['Value']):
            gLogger.info("LFCvsSEAgent.execute: Processing sub-request %s." %
                         ind)
            subRequestAttributes = oRequest.getSubRequestAttributes(
                ind, 'integrity')['Value']
            if subRequestAttributes['Status'] == 'Waiting':
                subRequestFiles = oRequest.getSubRequestFiles(
                    ind, 'integrity')['Value']
                operation = subRequestAttributes['Operation']

                ################################################
                #  If the sub-request is a lfcvsse operation
                if operation == 'LFCvsSE':
                    gLogger.info(
                        "LFCvsSEAgent.execute: Attempting to execute %s sub-request."
                        % operation)
                    for subRequestFile in subRequestFiles:
                        if subRequestFile['Status'] == 'Waiting':
                            lfn = subRequestFile['LFN']
                            oNamespaceBrowser = NamespaceBrowser(lfn)

                            # Loop over all the directories and sub-directories
                            while (oNamespaceBrowser.isActive()):
                                currentDir = oNamespaceBrowser.getActiveDir()
                                gLogger.info(
                                    "LFCvsSEAgent.execute: Attempting to get contents of %s."
                                    % currentDir)
                                res = self.ReplicaManager.getCatalogDirectoryContents(
                                    currentDir)
                                if not res['OK']:
                                    subDirs = [currentDir]
                                elif res['Value']['Failed'].has_key(
                                        currentDir):
                                    subDirs = [currentDir]
                                else:
                                    subDirs = res['Value']['Successful'][
                                        currentDir]['SubDirs']
                                    files = res['Value']['Successful'][
                                        currentDir]['Files']

                                    lfnSizeDict = {}
                                    pfnLfnDict = {}
                                    pfnStatusDict = {}
                                    sePfnDict = {}
                                    for lfn, lfnDict in files.items():
                                        lfnSizeDict[lfn] = lfnDict['MetaData'][
                                            'Size']
                                        for se in lfnDict['Replicas'].keys():
                                            pfn = lfnDict['Replicas'][se][
                                                'PFN']
                                            status = lfnDict['Replicas'][se][
                                                'Status']
                                            pfnStatusDict[pfn] = status
                                            pfnLfnDict[pfn] = lfn
                                            if not sePfnDict.has_key(se):
                                                sePfnDict[se] = []
                                            sePfnDict[se].append(pfn)

                                    for storageElementName, physicalFiles in sePfnDict.items(
                                    ):
                                        gLogger.info(
                                            "LFCvsSEAgent.execute: Attempting to get metadata for files on %s."
                                            % storageElementName)
                                        res = self.ReplicaManager.getStorageFileMetadata(
                                            physicalFiles, storageElementName)
                                        if not res['OK']:
                                            gLogger.error(
                                                "LFCvsSEAgent.execute: Completely failed to get physical file metadata.",
                                                res['Message'])
                                        else:
                                            for pfn in res['Value'][
                                                    'Failed'].keys():
                                                gLogger.error(
                                                    "LFCvsSEAgent.execute: Failed to get metadata.",
                                                    "%s %s" %
                                                    (pfn, res['Value']
                                                     ['Failed'][pfn]))
                                                lfn = pfnLfnDict[pfn]
                                                fileMetadata = {
                                                    'Prognosis':
                                                    'MissingSEPfn',
                                                    'LFN': lfn,
                                                    'PFN': pfn,
                                                    'StorageElement':
                                                    storageElementName,
                                                    'Size': lfnSizeDict[lfn]
                                                }
                                                IntegrityDB = RPCClient(
                                                    'DataManagement/DataIntegrity'
                                                )
                                                resInsert = IntegrityDB.insertProblematic(
                                                    AGENT_NAME, fileMetadata)
                                                if resInsert['OK']:
                                                    gLogger.info(
                                                        "LFCvsSEAgent.execute: Successfully added to IntegrityDB."
                                                    )
                                                    gLogger.error(
                                                        "Change the status in the LFC,ProcDB...."
                                                    )
                                                else:
                                                    gLogger.error(
                                                        "Shit, f**k, bugger. Add the failover."
                                                    )
                                            for pfn, pfnDict in res['Value'][
                                                    'Successful'].items():
                                                lfn = pfnLfnDict[pfn]
                                                catalogSize = int(
                                                    lfnSizeDict[lfn])
                                                storageSize = int(
                                                    pfnDict['Size'])
                                                if int(catalogSize) == int(
                                                        storageSize):
                                                    gLogger.info(
                                                        "LFCvsSEAgent.execute: Catalog and storage sizes match.",
                                                        "%s %s" %
                                                        (pfn,
                                                         storageElementName))
                                                    gLogger.info(
                                                        "Change the status in the LFC"
                                                    )
                                                else:
                                                    gLogger.error(
                                                        "LFCvsSEAgent.execute: Catalog and storage size mis-match.",
                                                        "%s %s" %
                                                        (pfn,
                                                         storageElementName))
                                                    fileMetadata = {
                                                        'Prognosis':
                                                        'PfnSizeMismatch',
                                                        'LFN':
                                                        lfn,
                                                        'PFN':
                                                        pfn,
                                                        'StorageElement':
                                                        storageElementName
                                                    }
                                                    IntegrityDB = RPCClient(
                                                        'DataManagement/DataIntegrity'
                                                    )
                                                    resInsert = IntegrityDB.insertProblematic(
                                                        AGENT_NAME,
                                                        fileMetadata)
                                                    if resInsert['OK']:
                                                        gLogger.info(
                                                            "LFCvsSEAgent.execute: Successfully added to IntegrityDB."
                                                        )
                                                        gLogger.error(
                                                            "Change the status in the LFC,ProcDB...."
                                                        )
                                                    else:
                                                        gLogger.error(
                                                            "Shit, f**k, bugger. Add the failover."
                                                        )
                                oNamespaceBrowser.updateDirs(subDirs)
                            oRequest.setSubRequestFileAttributeValue(
                                ind, 'integrity', lfn, 'Status', 'Done')

                ################################################
                #  If the sub-request is none of the above types
                else:
                    gLogger.info(
                        "LFCvsSEAgent.execute: Operation not supported.",
                        operation)

                ################################################
                #  Determine whether there are any active files
                if oRequest.isSubRequestEmpty(ind, 'integrity')['Value']:
                    oRequest.setSubRequestStatus(ind, 'integrity', 'Done')

            ################################################
            #  If the sub-request is already in terminal state
            else:
                gLogger.info(
                    "LFCvsSEAgent.execute: Sub-request %s is status '%s' and  not to be executed."
                    % (ind, subRequestAttributes['Status']))

        ################################################
        #  Generate the new request string after operation
        requestString = oRequest.toXML()['Value']
        res = self.RequestDBClient.updateRequest(requestName, requestString,
                                                 sourceServer)

        return S_OK()
Exemplo n.º 42
0
class LFCvsSEAgent( AgentModule ):

  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    res = self.RequestDBClient.getRequest( 'integrity' )
    if not res['OK']:
      gLogger.info( "LFCvsSEAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "LFCvsSEAgent.execute: No requests to be executed found." )
      return S_OK()
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    sourceServer = res['Value']['Server']
    gLogger.info( "LFCvsSEAgent.execute: Obtained request %s" % requestName )
    oRequest = RequestContainer( request = requestString )

    ################################################
    # Find the number of sub-requests from the request
    res = oRequest.getNumSubRequests( 'integrity' )
    if not res['OK']:
      errStr = "LFCvsSEAgent.execute: Failed to obtain number of integrity subrequests."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    gLogger.info( "LFCvsSEAgent.execute: Found %s sub requests." % res['Value'] )

    ################################################
    # For all the sub-requests in the request
    for ind in range( res['Value'] ):
      gLogger.info( "LFCvsSEAgent.execute: Processing sub-request %s." % ind )
      subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'integrity' )['Value']
      if subRequestAttributes['Status'] == 'Waiting':
        subRequestFiles = oRequest.getSubRequestFiles( ind, 'integrity' )['Value']
        operation = subRequestAttributes['Operation']

        ################################################
        #  If the sub-request is a lfcvsse operation
        if operation == 'LFCvsSE':
          gLogger.info( "LFCvsSEAgent.execute: Attempting to execute %s sub-request." % operation )
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              lfn = subRequestFile['LFN']
              oNamespaceBrowser = NamespaceBrowser( lfn )

              # Loop over all the directories and sub-directories
              while ( oNamespaceBrowser.isActive() ):
                currentDir = oNamespaceBrowser.getActiveDir()
                gLogger.info( "LFCvsSEAgent.execute: Attempting to get contents of %s." % currentDir )
                res = self.ReplicaManager.getCatalogDirectoryContents( currentDir )
                if not res['OK']:
                  subDirs = [currentDir]
                elif res['Value']['Failed'].has_key( currentDir ):
                  subDirs = [currentDir]
                else:
                  subDirs = res['Value']['Successful'][currentDir]['SubDirs']
                  files = res['Value']['Successful'][currentDir]['Files']

                  lfnSizeDict = {}
                  pfnLfnDict = {}
                  pfnStatusDict = {}
                  sePfnDict = {}
                  for lfn, lfnDict in files.items():
                    lfnSizeDict[lfn] = lfnDict['MetaData']['Size']
                    for se in lfnDict['Replicas'].keys():
                      pfn = lfnDict['Replicas'][se]['PFN']
                      status = lfnDict['Replicas'][se]['Status']
                      pfnStatusDict[pfn] = status
                      pfnLfnDict[pfn] = lfn
                      if not sePfnDict.has_key( se ):
                        sePfnDict[se] = []
                      sePfnDict[se].append( pfn )

                  for storageElementName, physicalFiles in sePfnDict.items():
                    gLogger.info( "LFCvsSEAgent.execute: Attempting to get metadata for files on %s." % storageElementName )
                    res = self.ReplicaManager.getStorageFileMetadata( physicalFiles, storageElementName )
                    if not res['OK']:
                      gLogger.error( "LFCvsSEAgent.execute: Completely failed to get physical file metadata.", res['Message'] )
                    else:
                      for pfn in res['Value']['Failed'].keys():
                        gLogger.error( "LFCvsSEAgent.execute: Failed to get metadata.", "%s %s" % ( pfn, res['Value']['Failed'][pfn] ) )
                        lfn = pfnLfnDict[pfn]
                        fileMetadata = {'Prognosis':'MissingSEPfn', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName, 'Size':lfnSizeDict[lfn]}
                        IntegrityDB = RPCClient( 'DataManagement/DataIntegrity' )
                        resInsert = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                        if resInsert['OK']:
                          gLogger.info( "LFCvsSEAgent.execute: Successfully added to IntegrityDB." )
                          gLogger.error( "Change the status in the LFC,ProcDB...." )
                        else:
                          gLogger.error( "Shit, f**k, bugger. Add the failover." )
                      for pfn, pfnDict in res['Value']['Successful'].items():
                        lfn = pfnLfnDict[pfn]
                        catalogSize = int( lfnSizeDict[lfn] )
                        storageSize = int( pfnDict['Size'] )
                        if int( catalogSize ) == int( storageSize ):
                          gLogger.info( "LFCvsSEAgent.execute: Catalog and storage sizes match.", "%s %s" % ( pfn, storageElementName ) )
                          gLogger.info( "Change the status in the LFC" )
                        else:
                          gLogger.error( "LFCvsSEAgent.execute: Catalog and storage size mis-match.", "%s %s" % ( pfn, storageElementName ) )
                          fileMetadata = {'Prognosis':'PfnSizeMismatch', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName}
                          IntegrityDB = RPCClient( 'DataManagement/DataIntegrity' )
                          resInsert = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                          if resInsert['OK']:
                            gLogger.info( "LFCvsSEAgent.execute: Successfully added to IntegrityDB." )
                            gLogger.error( "Change the status in the LFC,ProcDB...." )
                          else:
                            gLogger.error( "Shit, f**k, bugger. Add the failover." )
                oNamespaceBrowser.updateDirs( subDirs )
              oRequest.setSubRequestFileAttributeValue( ind, 'integrity', lfn, 'Status', 'Done' )

        ################################################
        #  If the sub-request is none of the above types
        else:
          gLogger.info( "LFCvsSEAgent.execute: Operation not supported.", operation )

        ################################################
        #  Determine whether there are any active files
        if oRequest.isSubRequestEmpty( ind, 'integrity' )['Value']:
          oRequest.setSubRequestStatus( ind, 'integrity', 'Done' )

      ################################################
      #  If the sub-request is already in terminal state
      else:
        gLogger.info( "LFCvsSEAgent.execute: Sub-request %s is status '%s' and  not to be executed." % ( ind, subRequestAttributes['Status'] ) )

    ################################################
    #  Generate the new request string after operation
    requestString = oRequest.toXML()['Value']
    res = self.RequestDBClient.updateRequest( requestName, requestString, sourceServer )

    return S_OK()
Exemplo n.º 43
0
class RequestTasks( TaskBase ):

  def __init__( self, transClient = None, logger = None, requestClient = None ):

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      self.requestClient = RequestClient()
    else:
      self.requestClient = requestClient

  def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '' ):
    requestType = 'transfer'
    requestOperation = 'replicateAndRegister'
    if transBody:
      try:
        requestType, requestOperation = transBody.split( ';' )
      except AttributeError:
        pass
    for taskID in sorted( taskDict ):
      paramDict = taskDict[taskID]
      transID = paramDict['TransformationID']
      oRequest = RequestContainer( init = False )
      subRequestIndex = oRequest.initiateSubRequest( requestType )['Value']
      attributeDict = {'Operation':requestOperation, 'TargetSE':paramDict['TargetSE']}
      oRequest.setSubRequestAttributes( subRequestIndex, requestType, attributeDict )
      files = []
      for lfn in paramDict['InputData'].split( ';' ):
        files.append( {'LFN':lfn} )
      oRequest.setSubRequestFiles( subRequestIndex, requestType, files )
      requestName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      oRequest.setRequestAttributes( {'RequestName':requestName} )
      taskDict[taskID]['TaskObject'] = oRequest.toXML()['Value']
    return S_OK( taskDict )

  def submitTransformationTasks( self, taskDict ):
    submitted = 0
    failed = 0
    startTime = time.time()
    for taskID in sorted( taskDict ):
      if not taskDict[taskID]['TaskObject']:
        taskDict[taskID]['Success'] = False
        failed += 1
        continue
      res = self.submitTaskToExternal( taskDict[taskID]['TaskObject'] )
      if res['OK']:
        taskDict[taskID]['ExternalID'] = res['Value']
        taskDict[taskID]['Success'] = True
        submitted += 1
      else:
        self.log.error( "Failed to submit task to RMS", res['Message'] )
        taskDict[taskID]['Success'] = False
        failed += 1
    self.log.info( 'submitTasks: Submitted %d tasks to RMS in %.1f seconds' % ( submitted, time.time() - startTime ) )
    if failed:
      self.log.info( 'submitTasks: Failed to submit %d tasks to RMS.' % ( failed ) )
    return S_OK( taskDict )

  def submitTaskToExternal( self, request ):
    """ Submits a request using RequestClient
    """
    if type( request ) in types.StringTypes:
      oRequest = RequestContainer( request )
      name = oRequest.getRequestName()['Value']
    elif type( request ) == types.InstanceType:
      name = request.getRequestName()['Value']
      request = request.toXML()['Value']
    else:
      return S_ERROR( "Request should be string or request object" )
    return self.requestClient.setRequest( name, request )

  def updateTransformationReservedTasks( self, taskDicts ):
    taskNameIDs = {}
    noTasks = []
    for taskDict in taskDicts:
      transID = taskDict['TransformationID']
      taskID = taskDict['TaskID']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      res = self.requestClient.getRequestInfo( taskName )
      if res['OK']:
        taskNameIDs[taskName] = res['Value'][0]
      elif re.search( "Failed to retrieve RequestID for Request", res['Message'] ):
        noTasks.append( taskName )
      else:
        self.log.warn( "Failed to get requestID for request", res['Message'] )
    return S_OK( {'NoTasks':noTasks, 'TaskNameIDs':taskNameIDs} )

  def getSubmittedTaskStatus( self, taskDicts ):
    updateDict = {}
    for taskDict in taskDicts:
      transID = taskDict['TransformationID']
      taskID = taskDict['TaskID']
      oldStatus = taskDict['ExternalStatus']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      res = self.requestClient.getRequestStatus( taskName )
      newStatus = ''
      if res['OK']:
        newStatus = res['Value']['RequestStatus']
      elif re.search( "Failed to retrieve RequestID for Request", res['Message'] ):
        newStatus = 'Failed'
      else:
        self.log.info( "getSubmittedTaskStatus: Failed to get requestID for request", res['Message'] )
      if newStatus and ( newStatus != oldStatus ):
        if newStatus not in updateDict:
          updateDict[newStatus] = []
        updateDict[newStatus].append( taskID )
    return S_OK( updateDict )

  def getSubmittedFileStatus( self, fileDicts ):
    taskFiles = {}
    for fileDict in fileDicts:
      transID = fileDict['TransformationID']
      taskID = fileDict['TaskID']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      if taskName not in taskFiles:
        taskFiles[taskName] = {}
      taskFiles[taskName][fileDict['LFN']] = fileDict['Status']

    updateDict = {}
    for taskName in sorted( taskFiles ):
      lfnDict = taskFiles[taskName]
      res = self.requestClient.getRequestFileStatus( taskName, lfnDict.keys() )
      if not res['OK']:
        self.log.warn( "getSubmittedFileStatus: Failed to get files status for request", res['Message'] )
        continue
      for lfn, newStatus in res['Value'].items():
        if newStatus == lfnDict[lfn]:
          pass
        elif newStatus == 'Done':
          updateDict[lfn] = 'Processed'
        elif newStatus == 'Failed':
          updateDict[lfn] = 'Problematic'
    return S_OK( updateDict )
Exemplo n.º 44
0
class RemovalAgent( AgentModule, RequestAgentMixIn ):
  """
    This Agent takes care of executing "removal" request from the RequestManagement system
  """

  def __init__( self, *args ):
    """
    Initialize the base class and define some extra data members
    """
    AgentModule.__init__( self, *args )
    self.requestDBClient = None
    self.replicaManager = None
    self.maxNumberOfThreads = 4
    self.maxRequestsInQueue = 100
    self.threadPool = None
    self.timeOutCounter = 0
    self.pendingRequests = True

  def initialize( self ):
    """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
    self.requestDBClient = RequestClient()
    # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
    self.RequestDBClient = self.requestDBClient
    self.replicaManager = ReplicaManager()

    gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size",
                               "RemovalAgent", "Bytes", gMonitor.OP_ACUM )

    gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileDone", "File removal done",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileFail", "File removal failed",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads )
    self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue )

    # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
    self.threadPool.daemonize()

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """
    Fill the TreadPool with ThreadJobs
    """
    self.pendingRequests = True
    while self.pendingRequests:
      requestExecutor = ThreadedJob( self.executeRequest )
      ret = self.threadPool.queueJob( requestExecutor )
      if not ret['OK']:
        break
      time.sleep( 0.1 )

    if self.timeOutCounter:
      gLogger.error( 'Timeouts during removal execution:', self.timeOutCounter )

    return S_OK()

  def executeRequest( self ):
    """
    Do the actual work in the Thread
    """
    ################################################
    # Get a request from request DB
    gMonitor.addMark( "Iteration", 1 )
    res = self.requestDBClient.getRequest( 'removal' )
    if not res['OK']:
      gLogger.info( "RemovalAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "RemovalAgent.execute: No requests to be executed found." )
      self.pendingRequests = False
      return S_OK()
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    sourceServer = res['Value']['Server']
    try:
      jobID = int( res['Value']['JobID'] )
    except ValueError:
      jobID = 0
    gLogger.info( "RemovalAgent.execute: Obtained request %s" % requestName )

    try:

      result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer )
      if result['OK']:
        currentOrder = result['Value']
      else:
        gLogger.error( 'Can not get the request execution order' )
        self.requestDBClient.updateRequest( requestName, requestString, sourceServer )
        return S_OK( 'Can not get the request execution order' )

      oRequest = RequestContainer( request = requestString )

      ################################################
      # Find the number of sub-requests from the request
      res = oRequest.getNumSubRequests( 'removal' )
      if not res['OK']:
        errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests."
        gLogger.error( errStr, res['Message'] )
        return S_OK()
      gLogger.info( "RemovalAgent.execute: Found %s sub requests." % res['Value'] )

      ################################################
      # For all the sub-requests in the request
      modified = False
      for ind in range( res['Value'] ):
        gMonitor.addMark( "Execute", 1 )
        gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind )
        subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal' )['Value']
        subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] )
        subStatus = subRequestAttributes['Status']
        if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
          subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal' )['Value']
          operation = subRequestAttributes['Operation']

          ################################################
          #  If the sub-request is a physical removal operation
          if operation == 'physicalRemoval':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSEs = subRequestAttributes['TargetSE'].split( ',' )
            physicalFiles = []
            pfnToLfn = {}
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                pfn = str( subRequestFile['PFN'] )
                lfn = str( subRequestFile['LFN'] )
                pfnToLfn[pfn] = lfn
                physicalFiles.append( pfn )
            gMonitor.addMark( 'PhysicalRemovalAtt', len( physicalFiles ) )
            failed = {}
            errMsg = {}
            for diracSE in diracSEs:
              res = self.replicaManager.removeStorageFile( physicalFiles, diracSE )
              if res['OK']:
                for pfn in res['Value']['Failed'].keys():
                  if not failed.has_key( pfn ):
                    failed[pfn] = {}
                  failed[pfn][diracSE] = res['Value']['Failed'][pfn]
              else:
                errMsg[diracSE] = res['Message']
                for pfn in physicalFiles:
                  if not failed.has_key( pfn ):
                    failed[pfn] = {}
                  failed[pfn][diracSE] = 'Completely'
            # Now analyse the results
            failedPFNs = failed.keys()
            pfnsOK = [pfn for pfn in physicalFiles if not pfn in failedPFNs]
            gMonitor.addMark( 'PhysicalRemovalDone', len( pfnsOK ) )
            for pfn in pfnsOK:
              gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( pfn, str( diracSEs ) ) )
              res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
              if not res['OK']:
                gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
              modified = True
            if failed:
              gMonitor.addMark( 'PhysicalRemovalFail', len( failedPFNs ) )
              for pfn in failedPFNs:
                for diracSE in failed[pfn].keys():
                  if type( failed[pfn][diracSE] ) in StringTypes:
                    if re.search( 'no such file or directory', failed[pfn][diracSE].lower() ):
                      gLogger.info( "RemovalAgent.execute: File did not exist.", pfn )
                      res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
                      if not res['OK']:
                        gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
                      modified = True
                    else:
                      gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( pfn, diracSE, failed[pfn][diracSE] ) )
            if errMsg:
              for diracSE in errMsg.keys():
                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                gLogger.error( errStr, errMsg[diracSE] )


          ################################################
          #  If the sub-request is a physical removal operation
          elif operation == 'removeFile':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            lfns = []
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                lfn = str( subRequestFile['LFN'] )
                lfns.append( lfn )
            gMonitor.addMark( 'RemoveFileAtt', len( lfns ) )
            res = self.replicaManager.removeFile( lfns )
            if res['OK']:
              gMonitor.addMark( 'RemoveFileDone', len( res['Value']['Successful'].keys() ) )
              for lfn in res['Value']['Successful'].keys():
                gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn )
                result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                if not result['OK']:
                  gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                modified = True
              gMonitor.addMark( 'RemoveFileFail', len( res['Value']['Failed'].keys() ) )
              for lfn in res['Value']['Failed'].keys():
                if type( res['Value']['Failed'][lfn] ) in StringTypes:
                  if re.search( 'no such file or directory', res['Value']['Failed'][lfn].lower() ):
                    gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                    result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                    if not result['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                    modified = True
                  else:
                    gLogger.info( "RemovalAgent.execute: Failed to remove file:",
                                  "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) )
            else:
              gMonitor.addMark( 'RemoveFileFail', len( lfns ) )
              errStr = "RemovalAgent.execute: Completely failed to remove files files."
              gLogger.error( errStr, res['Message'] )

          ################################################
          #  If the sub-request is a physical removal operation
          elif operation == 'replicaRemoval':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSEs = subRequestAttributes['TargetSE'].split( ',' )
            lfns = []
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                lfn = str( subRequestFile['LFN'] )
                lfns.append( lfn )
            gMonitor.addMark( 'ReplicaRemovalAtt', len( lfns ) )

            failed = {}
            errMsg = {}
            for diracSE in diracSEs:
              res = self.replicaManager.removeReplica( diracSE, lfns )
              if res['OK']:
                for lfn in res['Value']['Failed'].keys():
                  errorMessage = str( res['Value']['Failed'][lfn] )
                  if errorMessage.find( 'Write access not permitted for this credential.' ) != -1:
                    if self.__getProxyAndRemoveReplica( diracSE, lfn ):
                      continue
                  if errorMessage.find( 'seconds timeout for "__gfal_wrapper" call' ) != -1:
                    self.timeOutCounter += 1
                  if not failed.has_key( lfn ):
                    failed[lfn] = {}
                  failed[lfn][diracSE] = res['Value']['Failed'][lfn]
              else:
                errMsg[diracSE] = res['Message']
                for lfn in lfns:
                  if not failed.has_key( lfn ):
                    failed[lfn] = {}
                  failed[lfn][diracSE] = 'Completely'
            # Now analyse the results
            failedLFNs = failed.keys()
            lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs]
            gMonitor.addMark( 'ReplicaRemovalDone', len( lfnsOK ) )
            for lfn in lfnsOK:
              gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( lfn, str( diracSEs ) ) )
              res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
              if not res['OK']:
                gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
              modified = True
            if failed:
              gMonitor.addMark( 'PhysicalRemovalFail', len( failedLFNs ) )
              for lfn in failedLFNs:
                for diracSE in failed[lfn].keys():
                  if type( failed[lfn][diracSE] ) in StringTypes:
                    if re.search( 'no such file or directory', failed[lfn][diracSE].lower() ):
                      gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                      res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                      if not res['OK']:
                        gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                      modified = True
                    else:
                      gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( lfn, diracSE, failed[lfn][diracSE] ) )
            if errMsg:
              for diracSE in errMsg.keys():
                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                gLogger.error( errStr, errMsg[diracSE] )

          ################################################
          #  If the sub-request is a request to the online system to retransfer
          elif operation == 'reTransfer':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSE = subRequestAttributes['TargetSE']
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                pfn = str( subRequestFile['PFN'] )
                lfn = str( subRequestFile['LFN'] )
                res = self.replicaManager.onlineRetransfer( diracSE, pfn )
                if res['OK']:
                  if res['Value']['Successful'].has_key( pfn ):
                    gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn )
                    result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                    if not result['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                    modified = True
                  else:
                    errStr = "RemovalAgent.execute: Failed to request retransfer."
                    gLogger.error( errStr, "%s %s %s" % ( pfn, diracSE, res['Value']['Failed'][pfn] ) )
                else:
                  errStr = "RemovalAgent.execute: Completely failed to request retransfer."
                  gLogger.error( errStr, res['Message'] )
              else:
                gLogger.info( "RemovalAgent.execute: File already completed." )

          ################################################
          #  If the sub-request is none of the above types
          else:
            gLogger.error( "RemovalAgent.execute: Operation not supported.", operation )

          ################################################
          #  Determine whether there are any active files
          if oRequest.isSubRequestEmpty( ind, 'removal' )['Value']:
            oRequest.setSubRequestStatus( ind, 'removal', 'Done' )
            gMonitor.addMark( "Done", 1 )

        ################################################
        #  If the sub-request is already in terminal state
        else:
          gLogger.info( "RemovalAgent.execute:",
                        "Sub-request %s is status '%s' and not to be executed." %
                        ( ind, subRequestAttributes['Status'] ) )

      ################################################
      #  Generate the new request string after operation
      newrequestString = oRequest.toXML()['Value']
    except:
      # if something fails return the original request back to the server 
      res = self.requestDBClient.updateRequest( requestName, requestString, sourceServer )
      return S_OK()

    res = self.requestDBClient.updateRequest( requestName, newrequestString, sourceServer )

    if modified and jobID:
      result = self.finalizeRequest( requestName, jobID, sourceServer )

    return S_OK()

  def __getProxyAndRemoveReplica( self, diracSE, lfn ):
    """
    get a proxy from the owner of the file and try to remove it
    returns True if it succeeds, False otherwise
    """

    result = self.replicaManager.getCatalogDirectoryMetadata( lfn, singleFile = True )
    if not result[ 'OK' ]:
      gLogger.error( "Could not get metadata info", result[ 'Message' ] )
      return False
    ownerRole = result[ 'Value' ][ 'OwnerRole' ]
    ownerDN = result[ 'Value' ][ 'OwnerDN' ]
    if ownerRole[0] != "/":
      ownerRole = "/%s" % ownerRole

    userProxy = ''
    for ownerGroup in Registry.getGroupsWithVOMSAttribute( ownerRole ):
      result = gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited = True,
                                                requiredVOMSAttribute = ownerRole )
      if not result[ 'OK' ]:
        gLogger.verbose ( 'Failed to retrieve voms proxy for %s : %s:' % ( ownerDN, ownerRole ),
                          result[ 'Message' ] )
        continue
      userProxy = result[ 'Value' ]
      gLogger.verbose( "Got proxy for %s@%s [%s]" % ( ownerDN, ownerGroup, ownerRole ) )
      break
    if not userProxy:
      return False

    result = userProxy.dumpAllToFile()
    if not result[ 'OK' ]:
      gLogger.verbose( result[ 'Message' ] )
      return False

    upFile = result[ 'Value' ]
    prevProxyEnv = os.environ[ 'X509_USER_PROXY' ]
    os.environ[ 'X509_USER_PROXY' ] = upFile

    try:
      res = self.replicaManager.removeReplica( diracSE, lfn )
      if res['OK'] and lfn in res[ 'Value' ]['Successful']:
        gLogger.verbose( 'Removed %s from %s' % ( lfn, diracSE ) )
        return True
    finally:
      os.environ[ 'X509_USER_PROXY' ] = prevProxyEnv
      os.unlink( upFile )

    return False

  def finalize( self ):
    """
    Called by the Agent framework to cleanly end execution.
    In this case this module will wait until all pending ThreadedJbos in the
    ThreadPool get executed
    """

    self.threadPool.processAllResults()
    return S_OK()
Exemplo n.º 45
0
class RequestTasks(TaskBase):
    def __init__(self, transClient=None, logger=None, requestClient=None):

        if not logger:
            logger = gLogger.getSubLogger('RequestTasks')

        super(RequestTasks, self).__init__(transClient, logger)

        if not requestClient:
            from DIRAC.RequestManagementSystem.Client.RequestClient import RequestClient
            self.requestClient = RequestClient()
        else:
            self.requestClient = requestClient

    def prepareTransformationTasks(self,
                                   transBody,
                                   taskDict,
                                   owner='',
                                   ownerGroup=''):
        requestType = 'transfer'
        requestOperation = 'replicateAndRegister'
        try:
            requestType, requestOperation = transBody.split(';')
        except:
            pass
        for taskID in sortList(taskDict.keys()):
            paramDict = taskDict[taskID]
            transID = paramDict['TransformationID']
            oRequest = RequestContainer(init=False)
            subRequestIndex = oRequest.initiateSubRequest(requestType)['Value']
            attributeDict = {
                'Operation': requestOperation,
                'TargetSE': paramDict['TargetSE']
            }
            oRequest.setSubRequestAttributes(subRequestIndex, requestType,
                                             attributeDict)
            files = []
            for lfn in paramDict['InputData'].split(';'):
                files.append({'LFN': lfn})
            oRequest.setSubRequestFiles(subRequestIndex, requestType, files)
            requestName = str(transID).zfill(8) + '_' + str(taskID).zfill(8)
            oRequest.setRequestAttributes({'RequestName': requestName})
            taskDict[taskID]['TaskObject'] = oRequest.toXML()['Value']
        return S_OK(taskDict)

    def submitTransformationTasks(self, taskDict):
        submitted = 0
        failed = 0
        startTime = time.time()
        for taskID in sortList(taskDict.keys()):
            if not taskDict[taskID]['TaskObject']:
                taskDict[taskID]['Success'] = False
                failed += 1
                continue
            res = self.submitTaskToExternal(taskDict[taskID]['TaskObject'])
            if res['OK']:
                taskDict[taskID]['ExternalID'] = res['Value']
                taskDict[taskID]['Success'] = True
                submitted += 1
            else:
                self.log.error("Failed to submit task to RMS", res['Message'])
                taskDict[taskID]['Success'] = False
                failed += 1
        self.log.info(
            'submitTasks: Submitted %d tasks to RMS in %.1f seconds' %
            (submitted, time.time() - startTime))
        if failed:
            self.log.info('submitTasks: Failed to submit %d tasks to RMS.' %
                          (failed))
        return S_OK(taskDict)

    def submitTaskToExternal(self, request):
        """ Submits a request using RequestClient
    """
        if type(request) in types.StringTypes:
            oRequest = RequestContainer(request)
            name = oRequest.getRequestName()['Value']
        elif type(request) == types.InstanceType:
            name = request.getRequestName()['Value']
            request = request.toXML()['Value']
        else:
            return S_ERROR("Request should be string or request object")
        return self.requestClient.setRequest(name, request)

    def updateTransformationReservedTasks(self, taskDicts):
        taskNameIDs = {}
        noTasks = []
        for taskDict in taskDicts:
            transID = taskDict['TransformationID']
            taskID = taskDict['TaskID']
            taskName = str(transID).zfill(8) + '_' + str(taskID).zfill(8)
            res = self.requestClient.getRequestInfo(
                taskName, 'RequestManagement/centralURL')
            if res['OK']:
                taskNameIDs[taskName] = res['Value'][0]
            elif re.search("Failed to retrieve RequestID for Request",
                           res['Message']):
                noTasks.append(taskName)
            else:
                self.log.warn("Failed to get requestID for request",
                              res['Message'])
        return S_OK({'NoTasks': noTasks, 'TaskNameIDs': taskNameIDs})

    def getSubmittedTaskStatus(self, taskDicts):
        updateDict = {}
        for taskDict in taskDicts:
            transID = taskDict['TransformationID']
            taskID = taskDict['TaskID']
            oldStatus = taskDict['ExternalStatus']
            taskName = str(transID).zfill(8) + '_' + str(taskID).zfill(8)
            res = self.requestClient.getRequestStatus(
                taskName, 'RequestManagement/centralURL')
            newStatus = ''
            if res['OK']:
                newStatus = res['Value']['RequestStatus']
            elif re.search("Failed to retrieve RequestID for Request",
                           res['Message']):
                newStatus = 'Failed'
            else:
                self.log.info(
                    "getSubmittedTaskStatus: Failed to get requestID for request",
                    res['Message'])
            if newStatus and (newStatus != oldStatus):
                if not updateDict.has_key(newStatus):
                    updateDict[newStatus] = []
                updateDict[newStatus].append(taskID)
        return S_OK(updateDict)

    def getSubmittedFileStatus(self, fileDicts):
        taskFiles = {}
        for fileDict in fileDicts:
            transID = fileDict['TransformationID']
            taskID = fileDict['TaskID']
            taskName = str(transID).zfill(8) + '_' + str(taskID).zfill(8)
            if not taskFiles.has_key(taskName):
                taskFiles[taskName] = {}
            taskFiles[taskName][fileDict['LFN']] = fileDict['Status']

        updateDict = {}
        for taskName in sortList(taskFiles.keys()):
            lfnDict = taskFiles[taskName]
            res = self.requestClient.getRequestFileStatus(
                taskName, lfnDict.keys(), 'RequestManagement/centralURL')
            if not res['OK']:
                self.log.warn(
                    "getSubmittedFileStatus: Failed to get files status for request",
                    res['Message'])
                continue
            for lfn, newStatus in res['Value'].items():
                if newStatus == lfnDict[lfn]:
                    pass
                elif newStatus == 'Done':
                    updateDict[lfn] = 'Processed'
                elif newStatus == 'Failed':
                    updateDict[lfn] = 'Problematic'
        return S_OK(updateDict)
Exemplo n.º 46
0
class ZuziaAgent(AgentModule):

  def initialize(self):

    self.RequestDBClient = RequestClient()

    gMonitor.registerActivity("Iteration",          "Agent Loops",                  "ZuziaAgent",      "Loops/min",      gMonitor.OP_SUM)
    gMonitor.registerActivity("Attempted",          "Request Processed",            "ZuziaRAgent",     "Requests/min",   gMonitor.OP_SUM)
    gMonitor.registerActivity("Successful",         "Request Forward Successful",   "ZuziaAgent",      "Requests/min",   gMonitor.OP_SUM)
    gMonitor.registerActivity("Failed",             "Request Forward Failed",       "ZuziaAgent",      "Requests/min",   gMonitor.OP_SUM)

    self.local = PathFinder.getServiceURL("RequestManagement/localURL")
    if not self.local:
      self.local = AgentModule.am_getOption(self,'localURL','')
    if not self.local:
      errStr = 'The RequestManagement/localURL option must be defined.'
      gLogger.fatal(errStr)
      return S_ERROR(errStr)

    self.central = PathFinder.getServiceURL("RequestManagement/centralURL")
    if not self.central:
      errStr = 'The RequestManagement/centralURL option must be defined.'
      gLogger.fatal(errStr)
      return S_ERROR(errStr)
    return S_OK()

  def execute(self):
    """ This agent is the smallest and (cutest) of all the DIRAC agents in existence.
    """
    gMonitor.addMark("Iteration",1)

    central = PathFinder.getServiceURL("RequestManagement/centralURL")
    if central:
      self.central = central
    local = PathFinder.getServiceURL("RequestManagement/localURL")
    if local:
      self.local = local

    res = self.RequestDBClient.serveRequest(url=self.local)
    if not res['OK']:
      gLogger.error("ZuziaAgent.execute: Failed to get request from database.",self.local)
      return S_OK()
    elif not res['Value']:
      gLogger.info("ZuziaAgent.execute: No requests to be executed found.")
      return S_OK()

    gMonitor.addMark("Attempted",1)

    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    gLogger.info("ZuziaAgent.execute: Obtained request %s" % requestName)

    gLogger.info("ZuziaAgent.execute: Attempting to set request to %s." % self.central)
    res = self.RequestDBClient.setRequest(requestName,requestString,self.central)
    if res['OK']:
      gMonitor.addMark("Successful",1)
      gLogger.info("ZuziaAgent.execute: Successfully put request.")
    else:
      gMonitor.addMark("Failed",1)
      gLogger.error("ZuziaAgent.execute: Failed to set request to", self.central)
      gLogger.info("ZuziaAgent.execute: Attempting to set request to %s." % self.local)
      res = self.RequestDBClient.setRequest(requestName,requestString,self.local)
      if res['OK']:
        gLogger.info("ZuziaAgent.execute: Successfully put request.")
      else:
        gLogger.error("ZuziaAgent.execute: Failed to set request to", self.local)
        while not res['OK']:
          gLogger.info("ZuziaAgent.execute: Attempting to set request to anywhere.")
          res = self.RequestDBClient.setRequest(requestName,requestString)
        gLogger.info("ZuziaAgent.execute: Successfully put request to %s." % res["Server"])
    return S_OK()
Exemplo n.º 47
0
class ZuziaAgent(AgentModule):
    def initialize(self):

        self.RequestDBClient = RequestClient()

        gMonitor.registerActivity("Iteration", "Agent Loops", "ZuziaAgent",
                                  "Loops/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("Attempted", "Request Processed",
                                  "ZuziaRAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Successful", "Request Forward Successful",
                                  "ZuziaAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Failed", "Request Forward Failed",
                                  "ZuziaAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        self.local = PathFinder.getServiceURL("RequestManagement/localURL")
        if not self.local:
            self.local = AgentModule.am_getOption(self, 'localURL', '')
        if not self.local:
            errStr = 'The RequestManagement/localURL option must be defined.'
            gLogger.fatal(errStr)
            return S_ERROR(errStr)

        self.central = PathFinder.getServiceURL("RequestManagement/centralURL")
        if not self.central:
            errStr = 'The RequestManagement/centralURL option must be defined.'
            gLogger.fatal(errStr)
            return S_ERROR(errStr)
        return S_OK()

    def execute(self):
        """ This agent is the smallest and (cutest) of all the DIRAC agents in existence.
    """
        gMonitor.addMark("Iteration", 1)

        central = PathFinder.getServiceURL("RequestManagement/centralURL")
        if central:
            self.central = central
        local = PathFinder.getServiceURL("RequestManagement/localURL")
        if local:
            self.local = local

        res = self.RequestDBClient.serveRequest(url=self.local)
        if not res['OK']:
            gLogger.error(
                "ZuziaAgent.execute: Failed to get request from database.",
                self.local)
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "ZuziaAgent.execute: No requests to be executed found.")
            return S_OK()

        gMonitor.addMark("Attempted", 1)

        requestString = res['Value']['RequestString']
        requestName = res['Value']['RequestName']
        gLogger.info("ZuziaAgent.execute: Obtained request %s" % requestName)

        gLogger.info("ZuziaAgent.execute: Attempting to set request to %s." %
                     self.central)
        res = self.RequestDBClient.setRequest(requestName, requestString,
                                              self.central)
        if res['OK']:
            gMonitor.addMark("Successful", 1)
            gLogger.info("ZuziaAgent.execute: Successfully put request.")
        else:
            gMonitor.addMark("Failed", 1)
            gLogger.error("ZuziaAgent.execute: Failed to set request to",
                          self.central)
            gLogger.info(
                "ZuziaAgent.execute: Attempting to set request to %s." %
                self.local)
            res = self.RequestDBClient.setRequest(requestName, requestString,
                                                  self.local)
            if res['OK']:
                gLogger.info("ZuziaAgent.execute: Successfully put request.")
            else:
                gLogger.error("ZuziaAgent.execute: Failed to set request to",
                              self.local)
                while not res['OK']:
                    gLogger.info(
                        "ZuziaAgent.execute: Attempting to set request to anywhere."
                    )
                    res = self.RequestDBClient.setRequest(
                        requestName, requestString)
                gLogger.info(
                    "ZuziaAgent.execute: Successfully put request to %s." %
                    res["Server"])
        return S_OK()
Exemplo n.º 48
0
    def initialize(self):
        """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
        self.requestDBClient = RequestClient()
        # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
        self.RequestDBClient = self.requestDBClient
        self.replicaManager = ReplicaManager()

        gMonitor.registerActivity("Iteration", "Agent Loops", "RemovalAgent",
                                  "Loops/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("Execute", "Request Processed",
                                  "RemovalAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Done", "Request Completed", "RemovalAgent",
                                  "Requests/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("PhysicalRemovalAtt",
                                  "Physical removals attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalDone",
                                  "Successful physical removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalFail",
                                  "Failed physical removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalSize",
                                  "Physically removed size", "RemovalAgent",
                                  "Bytes", gMonitor.OP_ACUM)

        gMonitor.registerActivity("ReplicaRemovalAtt",
                                  "Replica removal attempted", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalDone",
                                  "Successful replica removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalFail",
                                  "Failed replica removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("RemoveFileAtt", "File removal attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileDone", "File removal done",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileFail", "File removal failed",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)

        self.maxNumberOfThreads = self.am_getOption('NumberOfThreads',
                                                    self.maxNumberOfThreads)
        self.maxRequestsInQueue = self.am_getOption('RequestsInQueue',
                                                    self.maxRequestsInQueue)
        self.threadPool = ThreadPool(1, self.maxNumberOfThreads,
                                     self.maxRequestsInQueue)

        # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
        self.threadPool.daemonize()

        self.maxRequests = self.am_getOption('MaxRequestsPerCycle', 1200.)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()
Exemplo n.º 49
0
 def __init__(self):
   TaskBase.__init__(self)
   self.requestClient = RequestClient()
Exemplo n.º 50
0
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 51
0
        lfns.append(inputFileName)

from DIRAC.Resources.Storage.StorageElement import StorageElement
import DIRAC
# Check is provided SE is OK
if targetSE != 'All':
    se = StorageElement(targetSE)
    if not se.valid:
        print se.errorReason
        print
        Script.showHelp()

from DIRAC.RequestManagementSystem.Client.RequestContainer import RequestContainer
from DIRAC.RequestManagementSystem.Client.RequestClient import RequestClient

requestClient = RequestClient()
requestType = 'removal'
requestOperation = 'replicaRemoval'
if targetSE == 'All':
    requestOperation = 'removeFile'

for lfnList in breakListIntoChunks(lfns, 100):

    oRequest = RequestContainer()
    subRequestIndex = oRequest.initiateSubRequest(requestType)['Value']
    attributeDict = {'Operation': requestOperation, 'TargetSE': targetSE}
    oRequest.setSubRequestAttributes(subRequestIndex, requestType,
                                     attributeDict)
    files = []
    for lfn in lfnList:
        files.append({'LFN': lfn})
Exemplo n.º 52
0
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 53
0
class SEvsLFCAgent( AgentModule ):

  def initialize( self ):

    self.RequestDBClient = RequestClient()
    self.ReplicaManager = ReplicaManager()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    IntegrityDB = RPCClient( 'DataManagement/DataIntegrity' )

    res = self.RequestDBClient.getRequest( 'integrity' )
    if not res['OK']:
      gLogger.info( "SEvsLFCAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "SEvsLFCAgent.execute: No requests to be executed found." )
      return S_OK()
    requestString = res['Value']['requestString']
    requestName = res['Value']['requestName']
    sourceServer = res['Value']['Server']
    gLogger.info( "SEvsLFCAgent.execute: Obtained request %s" % requestName )
    oRequest = RequestContainer( request = requestString )

    ################################################
    # Find the number of sub-requests from the request
    res = oRequest.getNumSubRequests( 'integrity' )
    if not res['OK']:
      errStr = "SEvsLFCAgent.execute: Failed to obtain number of integrity subrequests."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    gLogger.info( "SEvsLFCAgent.execute: Found %s sub requests." % res['Value'] )

    ################################################
    # For all the sub-requests in the request
    for ind in range( res['Value'] ):
      gLogger.info( "SEvsLFCAgent.execute: Processing sub-request %s." % ind )
      subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'integrity' )['Value']
      if subRequestAttributes['Status'] == 'Waiting':
        subRequestFiles = oRequest.getSubRequestFiles( ind, 'integrity' )['Value']
        operation = subRequestAttributes['Operation']

        ################################################
        #  If the sub-request is a lfcvsse operation
        if operation == 'SEvsLFC':
          gLogger.info( "SEvsLFCAgent.execute: Attempting to execute %s sub-request." % operation )
          storageElementName = subRequestAttributes['StorageElement']
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              lfn = subRequestFile['LFN']
              storageElement = StorageElement( storageElementName )
              res = storageElement.isValid()
              if not res['OK']:
                errStr = "SEvsLFCAgent.execute: Failed to instantiate destination StorageElement."
                gLogger.error( errStr, storageElement )
              else:
                res = storageElement.getPfnForLfn( lfn )
                if not res['OK']:
                  gLogger.info( 'shit bugger do something.' )
                else:
                  oNamespaceBrowser = NamespaceBrowser( res['Value'] )
                  # Loop over all the directories and sub-directories
                  while ( oNamespaceBrowser.isActive() ):
                    currentDir = oNamespaceBrowser.getActiveDir()

                    gLogger.info( "SEvsLFCAgent.execute: Attempting to list the contents of %s." % currentDir )
                    res = storageElement.listDirectory( currentDir )
                    if not res['Value']['Successful'].has_key( currentDir ):
                      gLogger.error( "SEvsLFCAgent.execute: Failed to list the directory contents.", "%s %s" % ( currentDir, res['Value']['Successful']['Failed'][currentDir] ) )
                      subDirs = [currentDir]
                    else:
                      subDirs = []
                      files = {}
                      for surl, surlDict in res['Value']['Successful'][currentDir]['Files'].items():
                        pfnRes = storageElement.getPfnForProtocol( surl, 'SRM2', withPort = False )
                        surl = pfnRes['Value']
                        files[surl] = surlDict
                      for surl, surlDict in res['Value']['Successful'][currentDir]['SubDirs'].items():
                        pfnRes = storageElement.getPfnForProtocol( surl, 'SRM2', withPort = False )
                        surl = pfnRes['Value']
                        subDirs.append( surl )

                      #subDirs = res['Value']['Successful'][currentDir]['SubDirs']
                      gLogger.info( "SEvsLFCAgent.execute: Successfully obtained %s sub-directories." % len( subDirs ) )
                      #files = res['Value']['Successful'][currentDir]['Files']
                      gLogger.info( "SEvsLFCAgent.execute: Successfully obtained %s files." % len( files ) )

                      selectedLfns = []
                      lfnPfnDict = {}
                      pfnSize = {}

                      for pfn, pfnDict in files.items():
                        res = storageElement.getPfnPath( pfn )
                        if not res['OK']:
                          gLogger.error( "SEvsLFCAgent.execute: Failed to get determine LFN from pfn.", "%s %s" % ( pfn, res['Message'] ) )
                          fileMetadata = {'Prognosis':'NonConventionPfn', 'LFN':'', 'PFN':pfn, 'StorageElement':storageElementName, 'Size':pfnDict['Size']}
                          res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                          if res['OK']:
                            gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                            gLogger.error( "Change the status in the LFC,ProcDB...." )
                          else:
                            gLogger.error( "Shit, f**k, bugger. Add the failover." )
                        else:
                          lfn = res['Value']
                          selectedLfns.append( lfn )
                          lfnPfnDict[lfn] = pfn
                          pfnSize[pfn] = pfnDict['Size']

                      res = self.ReplicaManager.getCatalogFileMetadata( selectedLfns )
                      if not res['OK']:
                        subDirs = [currentDir]
                      else:
                        for lfn in res['Value']['Failed'].keys():
                          gLogger.error( "SEvsLFCAgent.execute: Failed to get metadata.", "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) )
                          pfn = lfnPfnDict[lfn]
                          fileMetadata = {'Prognosis':'SEPfnNoLfn', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName, 'Size':pfnSize[pfn]}
                          res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                          if res['OK']:
                            gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                            gLogger.error( "Change the status in the LFC,ProcDB...." )
                          else:
                            gLogger.error( "Shit, f**k, bugger. Add the failover." )

                        for lfn, lfnDict in res['Value']['Successful'].items():
                          pfn = lfnPfnDict[lfn]
                          storageSize = pfnSize[pfn]
                          catalogSize = lfnDict['Size']
                          if int( catalogSize ) == int( storageSize ):
                            gLogger.info( "SEvsLFCAgent.execute: Catalog and storage sizes match.", "%s %s" % ( pfn, storageElementName ) )
                            gLogger.info( "Change the status in the LFC" )
                          elif int( storageSize ) == 0:
                            gLogger.error( "SEvsLFCAgent.execute: Physical file size is 0.", "%s %s" % ( pfn, storageElementName ) )
                            fileMetadata = {'Prognosis':'ZeroSizePfn', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName}
                            res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                            if res['OK']:
                              gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                              gLogger.error( "Change the status in the LFC,ProcDB...." )
                            else:
                              gLogger.error( "Shit, f**k, bugger. Add the failover." )
                          else:
                            gLogger.error( "SEvsLFCAgent.execute: Catalog and storage size mis-match.", "%s %s" % ( pfn, storageElementName ) )
                            fileMetadata = {'Prognosis':'PfnSizeMismatch', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName}
                            res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                            if res['OK']:
                              gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                              gLogger.error( "Change the status in the LFC,ProcDB...." )
                            else:
                              gLogger.error( "Shit, f**k, bugger. Add the failover." )

                        res = self.ReplicaManager.getCatalogReplicas( selectedLfns )
                        if not res['OK']:
                          subDirs = [currentDir]
                        else:
                          for lfn in res['Value']['Failed'].keys():
                            gLogger.error( "SEvsLFCAgent.execute: Failed to get replica information.", "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) )
                            pfn = lfnPfnDict[lfn]
                            fileMetadata = {'Prognosis':'PfnNoReplica', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName, 'Size':pfnSize[pfn]}
                            res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                            if res['OK']:
                              gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                              gLogger.error( "Change the status in the LFC,ProcDB...." )
                            else:
                              gLogger.error( "Shit, f**k, bugger. Add the failover." )

                          for lfn, repDict in res['Value']['Successful'].items():
                            pfn = lfnPfnDict[lfn]
                            registeredPfns = repDict.values()
                            if not pfn in registeredPfns:
                              gLogger.error( "SEvsLFCAgent.execute: SE PFN not registered.", "%s %s" % ( lfn, pfn ) )
                              fileMetadata = {'Prognosis':'PfnNoReplica', 'LFN':lfn, 'PFN':pfn, 'StorageElement':storageElementName}
                              res = IntegrityDB.insertProblematic( AGENT_NAME, fileMetadata )
                              if res['OK']:
                                gLogger.info( "SEvsLFCAgent.execute: Successfully added to IntegrityDB." )
                                gLogger.error( "Change the status in the LFC,ProcDB...." )
                              else:
                                gLogger.error( "Shit, f**k, bugger. Add the failover." )
                            else:
                              gLogger.info( "SEvsLFCAgent.execute: SE Pfn verified.", pfn )

                    oNamespaceBrowser.updateDirs( subDirs )
                  oRequest.setSubRequestFileAttributeValue( ind, 'integrity', lfn, 'Status', 'Done' )

        ################################################
        #  If the sub-request is none of the above types
        else:
          gLogger.info( "SEvsLFCAgent.execute: Operation not supported.", operation )

        ################################################
        #  Determine whether there are any active files
        if oRequest.isSubRequestEmpty( ind, 'integrity' )['Value']:
          oRequest.setSubRequestStatus( ind, 'integrity', 'Done' )

      ################################################
      #  If the sub-request is already in terminal state
      else:
        gLogger.info( "SEvsLFCAgent.execute: Sub-request %s is status '%s' and  not to be executed." % ( ind, subRequestAttributes['Status'] ) )

    ################################################
    #  Generate the new request string after operation
    requestString = oRequest.toXML()['Value']
    res = self.RequestDBClient.updateRequest( requestName, requestString, sourceServer )

    return S_OK()
Exemplo n.º 54
0
   try:
     shutil.copy(appTar,"%s%s" % (final_path, os.path.basename(appTar)))
   except Exception, x:
     gLogger.error("Could not copy because %s" % x)
     return S_ERROR("Could not copy because %s" % x)
 elif path.find("http://") > -1:
   gLogger.error("path %s was not foreseen, location not known, upload to location yourself, and publish in CS manually" % path)
   return S_ERROR()
 else:
   lfnpath = "%s%s" % (path, os.path.basename(appTar))
   res = rm.putAndRegister(lfnpath, appTar, ops.getValue('Software/BaseStorageElement',"CERN-SRM"))
   if not res['OK']:
     return res
   request = RequestContainer()
   request.setCreationTime()
   requestClient = RequestClient()
   request.setRequestName('copy_%s' % os.path.basename(appTar).replace(".tgz","").replace(".tar.gz",""))
   request.setSourceComponent('ReplicateILCSoft')
   copies_at = ops.getValue('Software/CopiesAt',[])
   index_copy = 0
   for copies in copies_at:
     res = request.addSubRequest({'Attributes':{'Operation' : 'replicateAndRegister',
                                                'TargetSE' : copies,
                                                'ExecutionOrder' : index_copy},
                                  'Files':[{'LFN':lfnpath}]},
                                  'transfer')
     #res = rm.replicateAndRegister("%s%s"%(path,appTar),"IN2P3-SRM")
     if not res['OK']:
       return res
     index_copy += 1
   requestxml = request.toXML()['Value']
Exemplo n.º 55
0
class RemovalAgent(AgentModule, RequestAgentMixIn):
    """
    This Agent takes care of executing "removal" request from the RequestManagement system
  """
    def __init__(self, *args):
        """
    Initialize the base class and define some extra data members
    """
        AgentModule.__init__(self, *args)
        self.requestDBClient = None
        self.replicaManager = None
        self.maxNumberOfThreads = 4
        self.maxRequestsInQueue = 100
        self.threadPool = None
        self.timeOutCounter = 0
        self.pendingRequests = True

    def initialize(self):
        """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
        self.requestDBClient = RequestClient()
        # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
        self.RequestDBClient = self.requestDBClient
        self.replicaManager = ReplicaManager()

        gMonitor.registerActivity("Iteration", "Agent Loops", "RemovalAgent",
                                  "Loops/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("Execute", "Request Processed",
                                  "RemovalAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Done", "Request Completed", "RemovalAgent",
                                  "Requests/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("PhysicalRemovalAtt",
                                  "Physical removals attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalDone",
                                  "Successful physical removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalFail",
                                  "Failed physical removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalSize",
                                  "Physically removed size", "RemovalAgent",
                                  "Bytes", gMonitor.OP_ACUM)

        gMonitor.registerActivity("ReplicaRemovalAtt",
                                  "Replica removal attempted", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalDone",
                                  "Successful replica removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalFail",
                                  "Failed replica removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("RemoveFileAtt", "File removal attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileDone", "File removal done",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileFail", "File removal failed",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)

        self.maxNumberOfThreads = self.am_getOption('NumberOfThreads',
                                                    self.maxNumberOfThreads)
        self.maxRequestsInQueue = self.am_getOption('RequestsInQueue',
                                                    self.maxRequestsInQueue)
        self.threadPool = ThreadPool(1, self.maxNumberOfThreads,
                                     self.maxRequestsInQueue)

        # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
        self.threadPool.daemonize()

        self.maxRequests = self.am_getOption('MaxRequestsPerCycle', 1200.)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        """
    Fill the TreadPool with ThreadJobs
    """
        self.pendingRequests = True
        self.maxRequests = min(
            10000., self.am_getOption('MaxRequestsPerCycle', self.maxRequests))
        requestCounter = 0
        while self.pendingRequests:
            if requestCounter > self.maxRequests:
                break
            requestCounter += 1
            requestExecutor = ThreadedJob(self.executeRequest)
            ret = self.threadPool.queueJob(requestExecutor)
            if not ret['OK']:
                break
            time.sleep(0.1)

        if self.timeOutCounter:
            gLogger.error('Timeouts during removal execution:',
                          self.timeOutCounter)

        return S_OK()

    def executeRequest(self):
        """
    Do the actual work in the Thread
    """
        ################################################
        # Get a request from request DB
        gMonitor.addMark("Iteration", 1)
        res = self.requestDBClient.getRequest('removal')
        if not res['OK']:
            gLogger.info(
                "RemovalAgent.execute: Failed to get request from database.")
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "RemovalAgent.execute: No requests to be executed found.")
            self.pendingRequests = False
            return S_OK()
        requestString = res['Value']['RequestString']
        requestName = res['Value']['RequestName']
        sourceServer = res['Value']['Server']

        jobID = 0
        try:
            jobID = int(res['Value']['JobID'])
        except:
            gLogger.warn(
                "RemovalAgent.execute: JobID not present or malformed in request '%s', will use 0 instead."
                % requestName)

        gLogger.info("RemovalAgent.execute: Obtained request %s" % requestName)

        try:

            result = self.requestDBClient.getCurrentExecutionOrder(
                requestName, sourceServer)
            if result['OK']:
                currentOrder = result['Value']
            else:
                gLogger.error('Can not get the request execution order')
                self.requestDBClient.updateRequest(requestName, requestString,
                                                   sourceServer)
                return S_OK('Can not get the request execution order')

            oRequest = RequestContainer(request=requestString)

            ################################################
            # Find the number of sub-requests from the request
            res = oRequest.getNumSubRequests('removal')
            if not res['OK']:
                errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests."
                gLogger.error(errStr, res['Message'])
                return S_OK()
            gLogger.info("RemovalAgent.execute: Found %s sub requests." %
                         res['Value'])

            ################################################
            # For all the sub-requests in the request
            modified = False
            for ind in range(res['Value']):
                gMonitor.addMark("Execute", 1)
                gLogger.info(
                    "RemovalAgent.execute: Processing sub-request %s." % ind)
                subRequestAttributes = oRequest.getSubRequestAttributes(
                    ind, 'removal')['Value']
                subExecutionOrder = int(subRequestAttributes['ExecutionOrder'])
                subStatus = subRequestAttributes['Status']
                if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
                    subRequestFiles = oRequest.getSubRequestFiles(
                        ind, 'removal')['Value']
                    operation = subRequestAttributes['Operation']

                    ################################################
                    #  If the sub-request is a physical removal operation
                    if operation == 'physicalRemoval':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSEs = subRequestAttributes['TargetSE'].split(',')
                        physicalFiles = []
                        pfnToLfn = {}
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                pfn = str(subRequestFile['PFN'])
                                lfn = str(subRequestFile['LFN'])
                                pfnToLfn[pfn] = lfn
                                physicalFiles.append(pfn)
                        gMonitor.addMark('PhysicalRemovalAtt',
                                         len(physicalFiles))
                        failed = {}
                        errMsg = {}
                        for diracSE in diracSEs:
                            res = self.replicaManager.removeStorageFile(
                                physicalFiles, diracSE)
                            if res['OK']:
                                for pfn in res['Value']['Failed'].keys():
                                    if not failed.has_key(pfn):
                                        failed[pfn] = {}
                                    failed[pfn][diracSE] = res['Value'][
                                        'Failed'][pfn]
                            else:
                                errMsg[diracSE] = res['Message']
                                for pfn in physicalFiles:
                                    if not failed.has_key(pfn):
                                        failed[pfn] = {}
                                    failed[pfn][diracSE] = 'Completely'
                        # Now analyse the results
                        failedPFNs = failed.keys()
                        pfnsOK = [
                            pfn for pfn in physicalFiles
                            if not pfn in failedPFNs
                        ]
                        gMonitor.addMark('PhysicalRemovalDone', len(pfnsOK))
                        for pfn in pfnsOK:
                            gLogger.info(
                                "RemovalAgent.execute: Successfully removed %s at %s"
                                % (pfn, str(diracSEs)))
                            res = oRequest.setSubRequestFileAttributeValue(
                                ind, 'removal', pfnToLfn[pfn], 'Status',
                                'Done')
                            if not res['OK']:
                                gLogger.error(
                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                    % ('Done', pfnToLfn[pfn]))
                            modified = True
                        if failed:
                            gMonitor.addMark('PhysicalRemovalFail',
                                             len(failedPFNs))
                            for pfn in failedPFNs:
                                for diracSE in failed[pfn].keys():
                                    if type(failed[pfn]
                                            [diracSE]) in StringTypes:
                                        if re.search(
                                                'no such file or directory',
                                                failed[pfn][diracSE].lower()):
                                            gLogger.info(
                                                "RemovalAgent.execute: File did not exist.",
                                                pfn)
                                            res = oRequest.setSubRequestFileAttributeValue(
                                                ind, 'removal', pfnToLfn[pfn],
                                                'Status', 'Done')
                                            if not res['OK']:
                                                gLogger.error(
                                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                                    % ('Done', pfnToLfn[pfn]))
                                            modified = True
                                        else:
                                            gLogger.info(
                                                "RemovalAgent.execute: Failed to remove file.",
                                                "%s at %s - %s" %
                                                (pfn, diracSE,
                                                 failed[pfn][diracSE]))
                        if errMsg:
                            for diracSE in errMsg.keys():
                                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                                gLogger.error(errStr, errMsg[diracSE])

                    ################################################
                    #  If the sub-request is a physical removal operation
                    elif operation == 'removeFile':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        lfns = []
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                lfn = str(subRequestFile['LFN'])
                                lfns.append(lfn)
                        gMonitor.addMark('RemoveFileAtt', len(lfns))
                        res = self.replicaManager.removeFile(lfns)
                        if res['OK']:
                            gMonitor.addMark(
                                'RemoveFileDone',
                                len(res['Value']['Successful'].keys()))
                            for lfn in res['Value']['Successful'].keys():
                                gLogger.info(
                                    "RemovalAgent.execute: Successfully removed %s."
                                    % lfn)
                                result = oRequest.setSubRequestFileAttributeValue(
                                    ind, 'removal', lfn, 'Status', 'Done')
                                if not result['OK']:
                                    gLogger.error(
                                        "RemovalAgent.execute: Error setting status to %s for %s"
                                        % ('Done', lfn))
                                modified = True
                            gMonitor.addMark(
                                'RemoveFileFail',
                                len(res['Value']['Failed'].keys()))
                            for lfn in res['Value']['Failed'].keys():
                                if type(res['Value']['Failed']
                                        [lfn]) in StringTypes:
                                    if re.search(
                                            'no such file or directory',
                                            res['Value']['Failed']
                                        [lfn].lower()):
                                        gLogger.info(
                                            "RemovalAgent.execute: File did not exist.",
                                            lfn)
                                        result = oRequest.setSubRequestFileAttributeValue(
                                            ind, 'removal', lfn, 'Status',
                                            'Done')
                                        if not result['OK']:
                                            gLogger.error(
                                                "RemovalAgent.execute: Error setting status to %s for %s"
                                                % ('Done', lfn))
                                        modified = True
                                    else:
                                        gLogger.info(
                                            "RemovalAgent.execute: Failed to remove file:",
                                            "%s %s" %
                                            (lfn, res['Value']['Failed'][lfn]))
                        else:
                            gMonitor.addMark('RemoveFileFail', len(lfns))
                            errStr = "RemovalAgent.execute: Completely failed to remove files files."
                            gLogger.error(errStr, res['Message'])

                    ################################################
                    #  If the sub-request is a physical removal operation
                    elif operation == 'replicaRemoval':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSEs = subRequestAttributes['TargetSE'].split(',')
                        lfns = []
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                lfn = str(subRequestFile['LFN'])
                                lfns.append(lfn)
                        gMonitor.addMark('ReplicaRemovalAtt', len(lfns))

                        failed = {}
                        errMsg = {}
                        for diracSE in diracSEs:
                            res = self.replicaManager.removeReplica(
                                diracSE, lfns)
                            if res['OK']:
                                for lfn in res['Value']['Failed'].keys():
                                    errorMessage = str(
                                        res['Value']['Failed'][lfn])
                                    if errorMessage.find(
                                            'Write access not permitted for this credential.'
                                    ) != -1:
                                        if self.__getProxyAndRemoveReplica(
                                                diracSE, lfn):
                                            continue
                                    if errorMessage.find(
                                            'seconds timeout for "__gfal_wrapper" call'
                                    ) != -1:
                                        self.timeOutCounter += 1
                                    if not failed.has_key(lfn):
                                        failed[lfn] = {}
                                    failed[lfn][diracSE] = res['Value'][
                                        'Failed'][lfn]
                            else:
                                errMsg[diracSE] = res['Message']
                                for lfn in lfns:
                                    if not failed.has_key(lfn):
                                        failed[lfn] = {}
                                    failed[lfn][diracSE] = 'Completely'
                        # Now analyse the results
                        failedLFNs = failed.keys()
                        lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs]
                        gMonitor.addMark('ReplicaRemovalDone', len(lfnsOK))
                        for lfn in lfnsOK:
                            gLogger.info(
                                "RemovalAgent.execute: Successfully removed %s at %s"
                                % (lfn, str(diracSEs)))
                            res = oRequest.setSubRequestFileAttributeValue(
                                ind, 'removal', lfn, 'Status', 'Done')
                            if not res['OK']:
                                gLogger.error(
                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                    % ('Done', lfn))
                            modified = True
                        if failed:
                            gMonitor.addMark('PhysicalRemovalFail',
                                             len(failedLFNs))
                            for lfn in failedLFNs:
                                for diracSE in failed[lfn].keys():
                                    if type(failed[lfn]
                                            [diracSE]) in StringTypes:
                                        if re.search(
                                                'no such file or directory',
                                                failed[lfn][diracSE].lower()):
                                            gLogger.info(
                                                "RemovalAgent.execute: File did not exist.",
                                                lfn)
                                            res = oRequest.setSubRequestFileAttributeValue(
                                                ind, 'removal', lfn, 'Status',
                                                'Done')
                                            if not res['OK']:
                                                gLogger.error(
                                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                                    % ('Done', lfn))
                                            modified = True
                                        else:
                                            gLogger.info(
                                                "RemovalAgent.execute: Failed to remove file.",
                                                "%s at %s - %s" %
                                                (lfn, diracSE,
                                                 failed[lfn][diracSE]))
                        if errMsg:
                            for diracSE in errMsg.keys():
                                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                                gLogger.error(errStr, errMsg[diracSE])

                    ################################################
                    #  If the sub-request is a request to the online system to retransfer
                    elif operation == 'reTransfer':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSE = subRequestAttributes['TargetSE']
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                pfn = str(subRequestFile['PFN'])
                                lfn = str(subRequestFile['LFN'])
                                res = self.replicaManager.onlineRetransfer(
                                    diracSE, pfn)
                                if res['OK']:
                                    if res['Value']['Successful'].has_key(pfn):
                                        gLogger.info(
                                            "RemovalAgent.execute: Successfully requested retransfer of %s."
                                            % pfn)
                                        result = oRequest.setSubRequestFileAttributeValue(
                                            ind, 'removal', lfn, 'Status',
                                            'Done')
                                        if not result['OK']:
                                            gLogger.error(
                                                "RemovalAgent.execute: Error setting status to %s for %s"
                                                % ('Done', lfn))
                                        modified = True
                                    else:
                                        errStr = "RemovalAgent.execute: Failed to request retransfer."
                                        gLogger.error(
                                            errStr, "%s %s %s" %
                                            (pfn, diracSE,
                                             res['Value']['Failed'][pfn]))
                                else:
                                    errStr = "RemovalAgent.execute: Completely failed to request retransfer."
                                    gLogger.error(errStr, res['Message'])
                            else:
                                gLogger.info(
                                    "RemovalAgent.execute: File already completed."
                                )

                    ################################################
                    #  If the sub-request is none of the above types
                    else:
                        gLogger.error(
                            "RemovalAgent.execute: Operation not supported.",
                            operation)

                    ################################################
                    #  Determine whether there are any active files
                    if oRequest.isSubRequestEmpty(ind, 'removal')['Value']:
                        oRequest.setSubRequestStatus(ind, 'removal', 'Done')
                        gMonitor.addMark("Done", 1)

                ################################################
                #  If the sub-request is already in terminal state
                else:
                    gLogger.info(
                        "RemovalAgent.execute:",
                        "Sub-request %s is status '%s' and not to be executed."
                        % (ind, subRequestAttributes['Status']))

            ################################################
            #  Generate the new request string after operation
            newrequestString = oRequest.toXML()['Value']
        except:
            # if something fails return the original request back to the server
            res = self.requestDBClient.updateRequest(requestName,
                                                     requestString,
                                                     sourceServer)
            return S_OK()

        res = self.requestDBClient.updateRequest(requestName, newrequestString,
                                                 sourceServer)

        if modified and jobID:
            result = self.finalizeRequest(requestName, jobID, sourceServer)

        return S_OK()

    def __getProxyAndRemoveReplica(self, diracSE, lfn):
        """
    get a proxy from the owner of the file and try to remove it
    returns True if it succeeds, False otherwise
    """

        result = self.replicaManager.getCatalogDirectoryMetadata(
            lfn, singleFile=True)
        if not result['OK']:
            gLogger.error("Could not get metadata info", result['Message'])
            return False
        ownerRole = result['Value']['OwnerRole']
        ownerDN = result['Value']['OwnerDN']
        if ownerRole[0] != "/":
            ownerRole = "/%s" % ownerRole

        userProxy = ''
        for ownerGroup in Registry.getGroupsWithVOMSAttribute(ownerRole):
            result = gProxyManager.downloadVOMSProxy(
                ownerDN,
                ownerGroup,
                limited=True,
                requiredVOMSAttribute=ownerRole)
            if not result['OK']:
                gLogger.verbose(
                    'Failed to retrieve voms proxy for %s : %s:' %
                    (ownerDN, ownerRole), result['Message'])
                continue
            userProxy = result['Value']
            gLogger.verbose("Got proxy for %s@%s [%s]" %
                            (ownerDN, ownerGroup, ownerRole))
            break
        if not userProxy:
            return False

        result = userProxy.dumpAllToFile()
        if not result['OK']:
            gLogger.verbose(result['Message'])
            return False

        upFile = result['Value']
        prevProxyEnv = os.environ['X509_USER_PROXY']
        os.environ['X509_USER_PROXY'] = upFile

        try:
            res = self.replicaManager.removeReplica(diracSE, lfn)
            if res['OK'] and lfn in res['Value']['Successful']:
                gLogger.verbose('Removed %s from %s' % (lfn, diracSE))
                return True
        finally:
            os.environ['X509_USER_PROXY'] = prevProxyEnv
            os.unlink(upFile)

        return False

    def finalize(self):
        """
    Called by the Agent framework to cleanly end execution.
    In this case this module will wait until all pending ThreadedJbos in the
    ThreadPool get executed
    """

        self.threadPool.processAllResults()
        return S_OK()
Exemplo n.º 56
0
class DISETForwardingAgent( AgentModule ):

  def initialize( self ):

    self.RequestDBClient = RequestClient()
    backend = self.am_getOption( 'Backend', '' )
    self.RequestDB = False
    if backend == 'mysql':
      from DIRAC.RequestManagementSystem.DB.RequestDBMySQL import RequestDBMySQL
      requestDB = RequestDBMySQL()
      if requestDB._connected:
        self.RequestDB = requestDB

    gMonitor.registerActivity( "Iteration", "Agent Loops", "DISETForwardingAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Attempted", "Request Processed", "DISETForwardingAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Successful", "Request Forward Successful", "DISETForwardingAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Failed", "Request Forward Failed", "DISETForwardingAgent", "Requests/min", gMonitor.OP_SUM )

    #self.local = PathFinder.getServiceURL( "RequestManagement/localURL" )
    #if not self.local:
    #  self.local = AgentModule.am_getOption( self, 'localURL', '' )
    #if not self.local:
    #  errStr = 'The RequestManagement/localURL option must be defined.'
    #  gLogger.fatal( errStr )
    #  return S_ERROR( errStr )
    return S_OK()

  def execute( self ):
    """ The main execute method
    """
    self.requestsPerCycle = self.am_getOption( 'RequestsPerCycle', 10 )
    count = 0
    while count < self.requestsPerCycle:
      gLogger.verbose( 'Executing request #%d in this cycle' % count )
      result = self.execute_request()
      if not result['OK']:
        return result
      count += 1

    return S_OK()

  def execute_request( self ):
    """ Takes one DISET request and forward it to the destination service
    """
    gMonitor.addMark( "Iteration", 1 )
    if self.RequestDB:
      res = self.RequestDB.getRequest( 'diset' )
    else:
      res = self.RequestDBClient.getRequest( 'diset' )
    if not res['OK']:
      gLogger.error( "DISETForwardingAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "DISETForwardingAgent.execute: No requests to be executed found." )
      return S_OK()

    gMonitor.addMark( "Attempted", 1 )
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    try:
      jobID = int( res['Value']['JobID'] )
    except:
      jobID = 0
    gLogger.info( "DISETForwardingAgent.execute: Obtained request %s" % requestName )

    if self.RequestDB:
      result = self.RequestDB._getRequestAttribute( 'RequestID', requestName = requestName )
      if not result['OK']:
        return S_OK( 'Can not get the request execution order' )
      requestID = result['Value']
      result = self.RequestDB.getCurrentExecutionOrder( requestID )
    else:
      result = self.RequestDBClient.getCurrentExecutionOrder( requestName )
    if result['OK']:
      currentOrder = result['Value']
    else:
      return S_OK( 'Can not get the request execution order' )

    oRequest = RequestContainer( request = requestString )
    requestAttributes = oRequest.getRequestAttributes()['Value']

    ################################################
    # Find the number of sub-requests from the request
    res = oRequest.getNumSubRequests( 'diset' )
    if not res['OK']:
      errStr = "DISETForwardingAgent.execute: Failed to obtain number of diset subrequests."
      gLogger.error( errStr, res['Message'] )
      return S_OK()

    gLogger.info( "DISETForwardingAgent.execute: Found %s sub requests for job %s" % ( res['Value'], jobID ) )
    ################################################
    # For all the sub-requests in the request
    modified = False
    for ind in range( res['Value'] ):
      subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'diset' )['Value']
      subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] )
      subStatus = subRequestAttributes['Status']
      gLogger.info( "DISETForwardingAgent.execute: Processing sub-request %s with execution order %d" % ( ind, subExecutionOrder ) )
      if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
        operation = subRequestAttributes['Operation']
        gLogger.info( "DISETForwardingAgent.execute: Attempting to forward %s type." % operation )
        rpcStubString = subRequestAttributes['Arguments']
        rpcStub, length = DEncode.decode( rpcStubString )
        res = executeRPCStub( rpcStub )
        if res['OK']:
          gLogger.info( "DISETForwardingAgent.execute: Successfully forwarded." )
          oRequest.setSubRequestStatus( ind, 'diset', 'Done' )
          gMonitor.addMark( "Successful", 1 )
          modified = True
        elif res['Message'] == 'No Matching Job':
          gLogger.warn( "DISETForwardingAgent.execute: No corresponding job found. Setting to done." )
          oRequest.setSubRequestStatus( ind, 'diset', 'Done' )
        else:
          gLogger.error( "DISETForwardingAgent.execute: Failed to forward request.", res['Message'] )
      else:
        gLogger.info( "DISETForwardingAgent.execute: Sub-request %s is status '%s' and  not to be executed." % ( ind, subRequestAttributes['Status'] ) )

    ################################################
    #  Generate the new request string after operation
    requestString = oRequest.toXML()['Value']
    if self.RequestDB:
      res = self.RequestDB.updateRequest( requestName, requestString )
    else:
      res = self.RequestDBClient.updateRequest( requestName, requestString )
    if res['OK']:
      gLogger.info( "DISETForwardingAgent.execute: Successfully updated request." )
    else:
      gLogger.error( "DISETForwardingAgent.execute: Failed to update request" )

    if modified and jobID:
      result = self.RequestDBClient.finalizeRequest( requestName, jobID )

    return S_OK()