Exemplo n.º 1
0
class PropagationCommand(Command):
    def __init__(self, args=None, clients=None):

        self.rssClient = ResourceStatusClient()
        super(PropagationCommand, self).__init__(args, clients)

    def doNew(self, masterParams=None):
        return S_OK()

    def doCache(self):

        if not self.args['site']:
            return S_ERROR('site was not found in args')

        site = self.args['site']

        elements = CSHelpers.getSiteElements(site)

        statusList = []

        if elements['OK']:
            for element in elements['Value']:
                status = self.rssClient.selectStatusElement(
                    "Resource",
                    "Status",
                    element,
                    meta={'columns': ['Status']})
                if not status['OK']:
                    return status

                if status['Value']:
                    statusList.append(status['Value'][0][0])
                else:  # forcing in the case the resource has no status (yet)
                    statusList.append('Active')

            if 'Active' in statusList:
                return S_OK({
                    'Status':
                    'Active',
                    'Reason':
                    'An element that belongs to the site is Active'
                })

            if 'Degraded' in statusList:
                return S_OK({
                    'Status':
                    'Degraded',
                    'Reason':
                    'An element that belongs to the site is Degraded'
                })

        return S_OK({
            'Status': 'Banned',
            'Reason': 'There is no Active element in the site'
        })

    def doMaster(self):
        return S_OK()
Exemplo n.º 2
0
class PropagationCommand(Command):
    def __init__(self, args=None, clients=None):

        self.rssClient = ResourceStatusClient()
        super(PropagationCommand, self).__init__(args, clients)

    def doNew(self, masterParams=None):
        return S_OK()

    def doCache(self):

        if not self.args["name"]:
            return S_ERROR("site was not found in args")

        site = self.args["name"]

        elements = CSHelpers.getSiteElements(site)

        statusList = []

        if elements["OK"]:
            for element in elements["Value"]:
                status = self.rssClient.selectStatusElement(
                    "Resource",
                    "Status",
                    element,
                    meta={"columns": ["Status"]})
                if not status["OK"]:
                    return status

                if status["Value"]:
                    statusList.append(status["Value"][0][0])
                else:  # forcing in the case the resource has no status (yet)
                    statusList.append("Active")

            if "Active" in statusList:
                return S_OK({
                    "Status":
                    "Active",
                    "Reason":
                    "An element that belongs to the site is Active"
                })

            if "Degraded" in statusList:
                return S_OK({
                    "Status":
                    "Degraded",
                    "Reason":
                    "An element that belongs to the site is Degraded"
                })

        return S_OK({
            "Status": "Banned",
            "Reason": "There is no Active element in the site"
        })

    def doMaster(self):
        return S_OK()
Exemplo n.º 3
0
    def getSiteMaskLogging(self, site=None, printOutput=False):
        """Retrieves site mask logging information.

       Example usage:

       >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr')
       {'OK': True, 'Value': }

       :returns: S_OK,S_ERROR
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        rssClient = ResourceStatusClient()
        result = rssClient.selectStatusElement('Site',
                                               'History',
                                               name=site,
                                               statusType='ComputingAccess')

        if not result['OK']:
            return result

        siteDict = {}
        for logTuple in result['Value']:
            status, reason, siteName, dateEffective, dateTokenExpiration, eType, sType, eID, lastCheckTime, author = logTuple
            result = getSiteFullNames(siteName)
            if not result['OK']:
                continue
            for sName in result['Value']:
                if site is None or (site and site == sName):
                    siteDict.setdefault(sName, [])
                    siteDict[sName].append((status, reason, dateEffective,
                                            author, dateTokenExpiration))

        if printOutput:
            if site:
                print '\nSite Mask Logging Info for %s\n' % site
            else:
                print '\nAll Site Mask Logging Info\n'

            for site, tupleList in siteDict.items():
                if not site:
                    print '\n===> %s\n' % site
                for tup in tupleList:
                    print str( tup[0] ).ljust( 8 ) + str( tup[1] ).ljust( 20 ) + \
                         '( ' + str( tup[2] ).ljust( len( str( tup[2] ) ) ) + ' )  "' + str( tup[3] ) + '"'
                print ' '

        return S_OK(siteDict)
Exemplo n.º 4
0
  def getSiteMaskLogging( self, site = None, printOutput = False ):
    """Retrieves site mask logging information.

       Example usage:

       >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr')
       {'OK': True, 'Value': }

       :returns: S_OK,S_ERROR
    """
    result = self.__checkSiteIsValid( site )
    if not result['OK']:
      return result
    
    rssClient = ResourceStatusClient()
    result = rssClient.selectStatusElement( 'Site', 'History', name = site, 
                                            statusType = 'ComputingAccess' )
    
    if not result['OK']:
      return result

    siteDict = {}
    for logTuple in result['Value']:
      status,reason,siteName,dateEffective,dateTokenExpiration,eType,sType,eID,lastCheckTime,author = logTuple
      result = getSiteFullNames( siteName )
      if not result['OK']:
        continue
      for sName in result['Value']:
        if site is None or (site and site == sName):
          siteDict.setdefault( sName, [] )
          siteDict[sName].append( (status,reason,dateEffective,author,dateTokenExpiration) )

    if printOutput:
      if site:
        print '\nSite Mask Logging Info for %s\n' % site
      else:
        print '\nAll Site Mask Logging Info\n'

      for site, tupleList in siteDict.items():
        if not site:
          print '\n===> %s\n' % site
        for tup in tupleList:
          print str( tup[0] ).ljust( 8 ) + str( tup[1] ).ljust( 20 ) + \
               '( ' + str( tup[2] ).ljust( len( str( tup[2] ) ) ) + ' )  "' + str( tup[3] ) + '"'
        print ' '
        
    return S_OK( siteDict )
Exemplo n.º 5
0
class PropagationCommand(Command):

  def __init__(self, args=None, clients=None):

    self.rssClient = ResourceStatusClient()
    super(PropagationCommand, self).__init__(args, clients)

  def doNew(self, masterParams=None):
    return S_OK()

  def doCache(self):

    if not self.args['site']:
      return S_ERROR('site was not found in args')

    site = self.args['site']

    elements = CSHelpers.getSiteElements(site)

    statusList = []

    if elements['OK']:
      for element in elements['Value']:
        status = self.rssClient.selectStatusElement("Resource", "Status", element, meta={'columns': ['Status']})
        if not status['OK']:
          return status

        if status['Value']:
          statusList.append(status['Value'][0][0])
        else:  # forcing in the case the resource has no status (yet)
          statusList.append('Active')

      if 'Active' in statusList:
        return S_OK({'Status': 'Active', 'Reason': 'An element that belongs to the site is Active'})

      if 'Degraded' in statusList:
        return S_OK({'Status': 'Degraded', 'Reason': 'An element that belongs to the site is Degraded'})

    return S_OK({'Status': 'Banned', 'Reason': 'There is no Active element in the site'})

  def doMaster(self):
    return S_OK()
Exemplo n.º 6
0
class TokenAgent( AgentModule ):
  '''
    TokenAgent is in charge of checking tokens assigned on resources.
    Notifications are sent to those users owning expiring tokens.
  '''

  # Hours to notify a user
  __notifyHours = 12

  # Rss token
  __rssToken = 'rs_svc'

  # Admin mail
  __adminMail = None

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''

    AgentModule.__init__( self, *args, **kwargs )

    self.notifyHours = self.__notifyHours
    self.adminMail = self.__adminMail

    self.rsClient = None
    self.rmClient = None
    self.noClient = None

    self.tokenDict = None
    self.diracAdmin = None

  def initialize( self ):
    ''' TokenAgent initialization
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''
    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours )

    self.rsClient = ResourceStatusClient()
    self.rmClient = ResourceManagementClient()
    self.noClient = NotificationClient()

    self.diracAdmin = DiracAdmin()

    return S_OK()

  def execute( self ):
    '''
      Looks for user tokens. If they are expired, or expiring, it notifies users.
    '''

    # Initialized here, as it is needed empty at the beginning of the execution
    self.tokenDict = {}

    # FIXME: probably this can be obtained from RssConfiguration instead
    elements = ( 'Site', 'Resource', 'Node' )

    for element in elements:

      self.log.info( 'Processing %s' % element )

      interestingTokens = self._getInterestingTokens( element )
      if not interestingTokens[ 'OK' ]:
        self.log.error( interestingTokens[ 'Message' ] )
        continue
      interestingTokens = interestingTokens[ 'Value' ]

      processTokens = self._processTokens( element, interestingTokens )
      if not processTokens[ 'OK' ]:
        self.log.error( processTokens[ 'Message' ] )
        continue

    notificationResult = self._notifyOfTokens()
    if not notificationResult[ 'OK' ]:
      self.log.error( notificationResult[ 'Message' ] )

    return S_OK()

  ## Protected methods #########################################################

  def _getInterestingTokens( self, element ):
    '''
      Given an element, picks all the entries with TokenExpiration < now + X<hours>
      If the TokenOwner is not the rssToken ( rs_svc ), it is selected.
    '''

    tokenExpLimit = datetime.utcnow() + timedelta( hours = self.notifyHours )

    tokenElements = self.rsClient.selectStatusElement( element, 'Status',
                                                       meta = { 'older' : ( 'TokenExpiration', tokenExpLimit ) } )

    if not tokenElements[ 'OK' ]:
      return tokenElements

    tokenColumns = tokenElements[ 'Columns' ]
    tokenElements = tokenElements[ 'Value' ]

    interestingTokens = []

    for tokenElement in tokenElements:

      tokenElement = dict( zip( tokenColumns, tokenElement ) )

      if tokenElement[ 'TokenOwner' ] != self.__rssToken:
        interestingTokens.append( tokenElement )

    return S_OK( interestingTokens )

  def _processTokens( self, element, tokenElements ):
    '''
      Given an element and a list of interesting token elements, updates the
      database if the token is expired, logs a message and adds
    '''

    never = datetime.max

    for tokenElement in tokenElements:

      try:
        name = tokenElement[ 'Name' ]
        statusType = tokenElement[ 'StatusType' ]
        status = tokenElement[ 'Status' ]
        tokenOwner = tokenElement[ 'TokenOwner' ]
        tokenExpiration = tokenElement[ 'TokenExpiration' ]
      except KeyError, e:
        return S_ERROR( e )

      # If token has already expired
      if tokenExpiration < datetime.utcnow():
        _msg = '%s with statusType "%s" and owner %s EXPIRED'
        self.log.info( _msg % ( name, statusType, tokenOwner ) )

        result = self.rsClient.addOrModifyStatusElement( element, 'Status', name = name,
                                                         statusType = statusType,
                                                         tokenOwner = self.__rssToken,
                                                         tokenExpiration = never )
        if not result[ 'OK' ]:
          return result

      else:
        _msg = '%s with statusType "%s" and owner %s -> %s'
        self.log.info( _msg % ( name, statusType, tokenOwner, tokenExpiration ) )

      if not tokenOwner in self.tokenDict:
        self.tokenDict[ tokenOwner ] = []

      self.tokenDict[ tokenOwner ].append( [ tokenOwner, element, name, statusType, status, tokenExpiration ] )

    return S_OK()
Exemplo n.º 7
0
class SummarizeLogsAgent(AgentModule):
  """ SummarizeLogsAgent as extension of AgentModule.
  """

  def __init__(self, *args, **kwargs):
    """ Constructor.

    """

    AgentModule.__init__(self, *args, **kwargs)

    self.rsClient = None

  def initialize(self):
    """ Standard initialize.

    :return: S_OK

    """

    self.rsClient = ResourceStatusClient()
    return S_OK()

  def execute(self):
    """ execute ( main method )

    The execute method runs over the three families of tables ( Site, Resource and
    Node ) performing identical operations. First, selects all logs for a given
    family ( and keeps track of which one is the last row ID ). It summarizes the
    logs and finally, deletes the logs from the database.

    :return: S_OK

    """

    # loop over the tables
    for element in ('Site', 'Resource', 'Node'):

      self.log.info('Summarizing %s' % element)

      # get all logs to be summarized
      selectLogElements = self._summarizeLogs(element)
      if not selectLogElements['OK']:
        self.log.error(selectLogElements['Message'])
        continue

      lastID, logElements = selectLogElements['Value']

      # logElements is a dictionary of key-value pairs as follows:
      # ( name, statusType ) : list( logs )
      for key, logs in logElements.iteritems():

        sumResult = self._registerLogs(element, key, logs)
        if not sumResult['OK']:
          self.log.error(sumResult['Message'])
          continue

      if lastID is not None:
        self.log.info('Deleting %sLog till ID %s' % (element, lastID))
        deleteResult = self.rsClient.deleteStatusElement(element, 'Log',
                                                         meta={'older': ('ID', lastID)})
        if not deleteResult['OK']:
          self.log.error(deleteResult['Message'])
          continue

    return S_OK()

  def _summarizeLogs(self, element):
    """ given an element, selects all logs in table <element>Log.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource or Node )

    :return: S_OK( lastID, listOfLogs ) / S_ERROR

    """

    selectResults = self.rsClient.selectStatusElement(element, 'Log')

    if not selectResults['OK']:
      return selectResults

    selectedItems = {}
    latestID = None

    if not selectResults['Value']:
      return S_OK((latestID, selectedItems))

    selectColumns = selectResults['Columns']
    selectResults = selectResults['Value']

    if selectResults:
      latestID = dict(zip(selectColumns, selectResults[-1]))['ID']

    for selectResult in selectResults:

      elementDict = dict(zip(selectColumns, selectResult))

      key = (elementDict['Name'], elementDict['StatusType'])

      if key not in selectedItems:
        selectedItems[key] = [elementDict]
      else:
        lastStatus = selectedItems[key][-1]['Status']
        lastToken = selectedItems[key][-1]['TokenOwner']

        # If there are no changes on the Status or the TokenOwner with respect
        # the previous one, discards the log.
        if lastStatus != elementDict['Status'] or lastToken != elementDict['TokenOwner']:
          selectedItems[key].append(elementDict)

    return S_OK((latestID, selectedItems))

  def _registerLogs(self, element, key, logs):
    """ Given an element, a key - which is a tuple ( <name>, <statusType> )
    and a list of dictionaries, this method inserts them on the <element>History
    table. Before inserting them, checks whether the first one is or is not on
    the <element>History table. If it is, it is not inserted.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **key** - `tuple`
        tuple with the name of the element and the statusType
      **logs** - `list`
        list of dictionaries containing the logs

     :return: S_OK / S_ERROR

    """

    if not logs:
      return S_OK()

    # Undo key
    name, statusType = key

    selectedRes = self.rsClient.selectStatusElement(element, 'History', name,
                                                    statusType,
                                                    meta={'columns': ['Status', 'TokenOwner'],
                                                          'limit': 1,
                                                          'order': ('DateEffective', 'desc')})

    if not selectedRes['OK']:
      return selectedRes
    selectedRes = selectedRes['Value']
    if not selectedRes:
      return S_OK()

    # We want from the <element>History table the last Status, and TokenOwner
    lastStatus, lastToken = None, None
    if selectedRes:
      try:
        lastStatus = selectedRes[0][0]
        lastToken = selectedRes[0][1]
      except IndexError:
        pass

    # If the first of the selected items has a different status than the latest
    # on the history, we keep it, otherwise we remove it.
    if logs[0]['Status'] == lastStatus and logs[0]['TokenOwner'] == lastToken:
      logs.pop(0)

    if logs:
      self.log.info('%s ( %s ):' % (name, statusType))
      self.log.debug(logs)

    for selectedItemDict in logs:

      res = self.__logToHistoryTable(element, selectedItemDict)
      if not res['OK']:
        return res

    return S_OK()

  def __logToHistoryTable(self, element, elementDict):
    """ Given an element and a dictionary with all the arguments, this method
    inserts a new entry on the <element>History table

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **elementDict** - `dict`
        dictionary returned from the DB to be inserted on the History table

    :return: S_OK / S_ERROR

    """

    name = elementDict.get('Name')
    statusType = elementDict.get('StatusType')
    status = elementDict.get('Status')
    elementType = elementDict.get('ElementType')
    reason = elementDict.get('Reason')
    dateEffective = elementDict.get('DateEffective')
    lastCheckTime = elementDict.get('LastCheckTime')
    tokenOwner = elementDict.get('TokenOwner')
    tokenExpiration = elementDict.get('TokenExpiration')

    self.log.info('  %s %s %s %s' % (status, dateEffective, tokenOwner, reason))

    return self.rsClient.insertStatusElement(element, 'History', name, statusType,
                                             status, elementType, reason,
                                             dateEffective, lastCheckTime,
                                             tokenOwner, tokenExpiration)
Exemplo n.º 8
0
class TokenAgent(AgentModule):
    """
    TokenAgent is in charge of checking tokens assigned on resources.
    Notifications are sent to those users owning expiring tokens.
  """

    # Hours to notify a user
    __notifyHours = 12

    # Rss token
    __rssToken = "rs_svc"

    # Admin mail
    __adminMail = None

    def __init__(self, *args, **kwargs):
        """ c'tor
    """

        AgentModule.__init__(self, *args, **kwargs)

        self.notifyHours = self.__notifyHours
        self.adminMail = self.__adminMail

        self.rsClient = None
        self.tokenDict = None
        self.diracAdmin = None

    def initialize(self):
        """ TokenAgent initialization
    """

        self.notifyHours = self.am_getOption("notifyHours", self.notifyHours)
        self.adminMail = self.am_getOption("adminMail", self.adminMail)

        self.rsClient = ResourceStatusClient()
        self.diracAdmin = DiracAdmin()

        return S_OK()

    def execute(self):
        """
      Looks for user tokens. If they are expired, or expiring, it notifies users.
    """

        # Initialized here, as it is needed empty at the beginning of the execution
        self.tokenDict = {}

        # FIXME: probably this can be obtained from RssConfiguration instead
        elements = ("Site", "Resource", "Node")

        for element in elements:

            self.log.info("Processing %s" % element)

            interestingTokens = self._getInterestingTokens(element)
            if not interestingTokens["OK"]:
                self.log.error(interestingTokens["Message"])
                continue
            interestingTokens = interestingTokens["Value"]

            processTokens = self._processTokens(element, interestingTokens)
            if not processTokens["OK"]:
                self.log.error(processTokens["Message"])
                continue

        notificationResult = self._notifyOfTokens()
        if not notificationResult["OK"]:
            self.log.error(notificationResult["Message"])

        return S_OK()

    ## Protected methods #########################################################

    def _getInterestingTokens(self, element):
        """
      Given an element, picks all the entries with TokenExpiration < now + X<hours>
      If the TokenOwner is not the rssToken ( rs_svc ), it is selected.
    """

        tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours)

        tokenElements = self.rsClient.selectStatusElement(
            element, "Status", meta={"older": ("TokenExpiration", tokenExpLimit)}
        )

        if not tokenElements["OK"]:
            return tokenElements

        tokenColumns = tokenElements["Columns"]
        tokenElements = tokenElements["Value"]

        interestingTokens = []

        for tokenElement in tokenElements:

            tokenElement = dict(zip(tokenColumns, tokenElement))

            if tokenElement["TokenOwner"] != self.__rssToken:
                interestingTokens.append(tokenElement)

        return S_OK(interestingTokens)

    def _processTokens(self, element, tokenElements):
        """
      Given an element and a list of interesting token elements, updates the
      database if the token is expired, logs a message and adds
    """

        never = datetime.max

        for tokenElement in tokenElements:

            try:
                name = tokenElement["Name"]
                statusType = tokenElement["StatusType"]
                status = tokenElement["Status"]
                tokenOwner = tokenElement["TokenOwner"]
                tokenExpiration = tokenElement["TokenExpiration"]
            except KeyError, e:
                return S_ERROR(e)

            # If token has already expired
            if tokenExpiration < datetime.utcnow():
                _msg = '%s with statusType "%s" and owner %s EXPIRED'
                self.log.info(_msg % (name, statusType, tokenOwner))

                result = self.rsClient.addOrModifyStatusElement(
                    element,
                    "Status",
                    name=name,
                    statusType=statusType,
                    tokenOwner=self.__rssToken,
                    tokenExpiration=never,
                )
                if not result["OK"]:
                    return result

            else:
                _msg = '%s with statusType "%s" and owner %s -> %s'
                self.log.info(_msg % (name, statusType, tokenOwner, tokenExpiration))

            if not tokenOwner in self.tokenDict:
                self.tokenDict[tokenOwner] = []

            self.tokenDict[tokenOwner].append([tokenOwner, element, name, statusType, status, tokenExpiration])

        return S_OK()
Exemplo n.º 9
0
class SummarizeLogsAgent(AgentModule):
    """ SummarizeLogsAgent as extension of AgentModule.
  """
    def __init__(self, *args, **kwargs):
        """ Constructor.

    """

        AgentModule.__init__(self, *args, **kwargs)

        self.rsClient = None

    def initialize(self):
        """ Standard initialize.

    :return: S_OK

    """

        self.rsClient = ResourceStatusClient()
        return S_OK()

    def execute(self):
        """ execute ( main method )

    The execute method runs over the three families of tables ( Site, Resource and
    Node ) performing identical operations. First, selects all logs for a given
    family ( and keeps track of which one is the last row ID ). It summarizes the
    logs and finally, deletes the logs from the database.

    :return: S_OK

    """

        # loop over the tables
        for element in ('Site', 'Resource', 'Node'):

            self.log.info('Summarizing %s' % element)

            # get all logs to be summarized
            selectLogElements = self._summarizeLogs(element)
            if not selectLogElements['OK']:
                self.log.error(selectLogElements['Message'])
                continue

            lastID, logElements = selectLogElements['Value']

            # logElements is a dictionary of key-value pairs as follows:
            # ( name, statusType ) : list( logs )
            for key, logs in logElements.iteritems():

                sumResult = self._registerLogs(element, key, logs)
                if not sumResult['OK']:
                    self.log.error(sumResult['Message'])
                    continue

            if lastID is not None:
                self.log.info('Deleting %sLog till ID %s' % (element, lastID))
                deleteResult = self.rsClient.deleteStatusElement(
                    element, 'Log', meta={'older': ['ID', lastID]})
                if not deleteResult['OK']:
                    self.log.error(deleteResult['Message'])
                    continue

        return S_OK()

    def _summarizeLogs(self, element):
        """ given an element, selects all logs in table <element>Log.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource or Node )

    :return: S_OK( lastID, listOfLogs ) / S_ERROR

    """

        selectResults = self.rsClient.selectStatusElement(element, 'Log')

        if not selectResults['OK']:
            return selectResults

        selectedItems = {}
        latestID = None

        if not selectResults['Value']:
            return S_OK((latestID, selectedItems))

        selectColumns = selectResults['Columns']
        selectResults = selectResults['Value']

        if selectResults:
            latestID = dict(zip(selectColumns, selectResults[-1]))['ID']

        for selectResult in selectResults:

            elementDict = dict(zip(selectColumns, selectResult))

            key = (elementDict['Name'], elementDict['StatusType'])

            if key not in selectedItems:
                selectedItems[key] = [elementDict]
            else:
                lastStatus = selectedItems[key][-1]['Status']
                lastToken = selectedItems[key][-1]['TokenOwner']

                # If there are no changes on the Status or the TokenOwner with respect
                # the previous one, discards the log.
                if lastStatus != elementDict[
                        'Status'] or lastToken != elementDict['TokenOwner']:
                    selectedItems[key].append(elementDict)

        return S_OK((latestID, selectedItems))

    def _registerLogs(self, element, key, logs):
        """ Given an element, a key - which is a tuple ( <name>, <statusType> )
    and a list of dictionaries, this method inserts them on the <element>History
    table. Before inserting them, checks whether the first one is or is not on
    the <element>History table. If it is, it is not inserted.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **key** - `tuple`
        tuple with the name of the element and the statusType
      **logs** - `list`
        list of dictionaries containing the logs

     :return: S_OK / S_ERROR

    """

        if not logs:
            return S_OK()

        # Undo key
        name, statusType = key

        selectedRes = self.rsClient.selectStatusElement(
            element,
            'History',
            name,
            statusType,
            meta={
                'columns': ['Status', 'TokenOwner'],
                'limit': 1,
                'order': ['DateEffective', 'desc']
            })

        if not selectedRes['OK']:
            return selectedRes
        selectedRes = selectedRes['Value']
        if not selectedRes:
            for selectedItemDict in logs:
                res = self.__logToHistoryTable(element, selectedItemDict)
                if not res['OK']:
                    return res
                return S_OK()

        # We want from the <element>History table the last Status, and TokenOwner
        lastStatus, lastToken = None, None
        if selectedRes:
            try:
                lastStatus = selectedRes[0][0]
                lastToken = selectedRes[0][1]
            except IndexError:
                pass

        # If the first of the selected items has a different status than the latest
        # on the history, we keep it, otherwise we remove it.
        if logs[0]['Status'] == lastStatus and logs[0][
                'TokenOwner'] == lastToken:
            logs.pop(0)

        if logs:
            self.log.info('%s ( %s ):' % (name, statusType))
            self.log.debug(logs)

        for selectedItemDict in logs:

            res = self.__logToHistoryTable(element, selectedItemDict)
            if not res['OK']:
                return res

        return S_OK()

    def __logToHistoryTable(self, element, elementDict):
        """ Given an element and a dictionary with all the arguments, this method
    inserts a new entry on the <element>History table

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **elementDict** - `dict`
        dictionary returned from the DB to be inserted on the History table

    :return: S_OK / S_ERROR

    """

        name = elementDict.get('Name')
        statusType = elementDict.get('StatusType')
        # vo = elementDict.get('VO')  # FIXME: not sure about it
        status = elementDict.get('Status')
        elementType = elementDict.get('ElementType')
        reason = elementDict.get('Reason')
        dateEffective = elementDict.get('DateEffective')
        lastCheckTime = elementDict.get('LastCheckTime')
        tokenOwner = elementDict.get('TokenOwner')
        tokenExpiration = elementDict.get('TokenExpiration')

        self.log.info('  %s %s %s %s' %
                      (status, dateEffective, tokenOwner, reason))

        return self.rsClient.insertStatusElement(
            element=element,
            tableType='History',
            name=name,
            statusType=statusType,
            status=status,
            elementType=elementType,
            reason=reason,
            dateEffective=dateEffective,
            lastCheckTime=lastCheckTime,
            tokenOwner=tokenOwner,
            tokenExpiration=tokenExpiration)
Exemplo n.º 10
0
class ResourceStatus( object ):
  """
  ResourceStatus helper that connects to CS if RSS flag is not Active. It keeps
  the connection to the db / server as an object member, to avoid creating a new
  one massively.
  """

  __metaclass__ = DIRACSingleton

  def __init__( self, rssFlag = None ):
    """
    Constructor, initializes the rssClient.
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__ )
    self.rssConfig = RssConfiguration()
    self.__opHelper = Operations()
    self.rssClient = ResourceStatusClient()
    self.rssFlag = rssFlag
    if rssFlag is None:
      self.rssFlag = self.__getMode()

    # We can set CacheLifetime and CacheHistory from CS, so that we can tune them.
    cacheLifeTime = int( self.rssConfig.getConfigCache() )

    # RSSCache only affects the calls directed to RSS, if using the CS it is not used.
    self.rssCache = RSSCache( cacheLifeTime, self.__updateRssCache )

  def getElementStatus( self, elementName, elementType, statusType = None, default = None ):
    """
    Helper function, tries to get information from the RSS for the given
    Element, otherwise, it gets it from the CS.

    :param elementName: name of the element
    :type elementName: str
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: None, str, list
    :param default: defult value (meaningful only when rss is InActive)
    :type default: str
    :return: S_OK/S_ERROR
    :rtype: dict

    :Example:
    >>> getElementStatus('CE42', 'ComputingElement')
        S_OK( { 'CE42': { 'all': 'Active' } } } )
    >>> getElementStatus('SE1', 'StorageElement', 'ReadAccess')
        S_OK( { 'SE1': { 'ReadAccess': 'Banned' } } } )
    >>> getElementStatus('SE1', 'ThisIsAWrongElementType', 'ReadAccess')
        S_ERROR( xyz.. )
    >>> getElementStatus('ThisIsAWrongName', 'StorageElement', 'WriteAccess')
        S_ERROR( xyz.. )
    >>> getElementStatus('A_file_catalog', 'FileCatalog')
        S_OK( { 'A_file_catalog': { 'all': 'Active' } } } )
    >>> getElementStatus('SE1', 'StorageElement', ['ReadAccess', 'WriteAccess'])
        S_OK( { 'SE1': { 'ReadAccess': 'Banned' , 'WriteAccess': 'Active'} } } )
    >>> getElementStatus('SE1', 'StorageElement')
        S_OK( { 'SE1': { 'ReadAccess': 'Probing' ,
                         'WriteAccess': 'Active',
                         'CheckAccess': 'Degraded',
                         'RemoveAccess': 'Banned'} } } )
    """

    allowedParameters = ["StorageElement", "ComputingElement", "FTS", "Catalog"]

    if elementType not in allowedParameters:
      return S_ERROR("%s in not in the list of the allowed parameters: %s" % (elementType, allowedParameters))

    # Apply defaults
    if not statusType:
      if elementType == "StorageElement":
        statusType = ['ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']
      elif elementType == "ComputingElement":
        statusType = ['all']
      elif elementType == "FTS":
        statusType = ['all']
      elif elementType == "Catalog":
        statusType = ['all']

    if self.rssFlag:
      return self.__getRSSElementStatus( elementName, elementType, statusType )
    else:
      return self.__getCSElementStatus( elementName, elementType, statusType, default )

  def setElementStatus( self, elementName, elementType, statusType, status, reason = None, tokenOwner = None ):
    """ Tries set information in RSS and in CS.

    :param elementName: name of the element
    :type elementName: str
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: str
    :param reason: reason for setting the status
    :type reason: str
    :param tokenOwner: owner of the token (meaningful only when rss is Active)
    :type tokenOwner: str
    :return: S_OK/S_ERROR
    :rtype: dict

    :Example:
    >>> setElementStatus('CE42', 'ComputingElement', 'all', 'Active')
        S_OK(  xyz.. )
    >>> setElementStatus('SE1', 'StorageElement', 'ReadAccess', 'Banned')
        S_OK(  xyz.. )
    """

    if self.rssFlag:
      return self.__setRSSElementStatus( elementName, elementType, statusType, status, reason, tokenOwner )
    else:
      return self.__setCSElementStatus( elementName, elementType, statusType, status )

################################################################################

  def __updateRssCache( self ):
    """ Method used to update the rssCache.

        It will try 5 times to contact the RSS before giving up
    """

    meta = { 'columns' : [ 'Name', 'ElementType', 'StatusType', 'Status' ] }

    for ti in range( 5 ):
      rawCache = self.rssClient.selectStatusElement( 'Resource', 'Status', meta = meta )
      if rawCache['OK']:
        break
      self.log.warn( "Can't get resource's status", rawCache['Message'] + "; trial %d" % ti )
      sleep( math.pow( ti, 2 ) )
      self.rssClient = ResourceStatusClient()

    if not rawCache[ 'OK' ]:
      return rawCache
    return S_OK( getCacheDictFromRawData( rawCache[ 'Value' ] ) )

################################################################################

  def __getRSSElementStatus( self, elementName, elementType, statusType ):
    """ Gets from the cache or the RSS the Elements status. The cache is a
        copy of the DB table. If it is not on the cache, most likely is not going
        to be on the DB.

        There is one exception: item just added to the CS, e.g. new Element.
        The period between it is added to the DB and the changes are propagated
        to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime>
        minutes.

  :param elementName: name of the element
  :type elementName: str
  :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
  :type elementType: str
  :param statusType: type of the status (meaningful only when elementType==StorageElement,
                     otherwise it is 'all' or ['all'])
  :type statusType: str, list
  """

    cacheMatch = self.rssCache.match( elementName, elementType, statusType )

    self.log.debug( '__getRSSElementStatus' )
    self.log.debug( cacheMatch )

    return cacheMatch

  def __getCSElementStatus( self, elementName, elementType, statusType, default ):
    """ Gets from the CS the Element status

    :param elementName: name of the element
    :type elementName: str
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: str, list
    :param default: defult value
    :type default: None, str
    """

    # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just return 'Active'
    if elementType in ('ComputingElement', 'FTS'):
      return S_OK( { elementName: { 'all': 'Active'} } )

    # If we are here it is because elementType is either 'StorageElement' or 'Catalog'
    if elementType == 'StorageElement':
      cs_path = "/Resources/StorageElements"
    elif elementType == 'Catalog':
      cs_path = "/Resources/FileCatalogs"
      statusType = ['Status']

    if not isinstance( elementName, list ):
      elementName = [ elementName ]

    if not isinstance( statusType, list ):
      statusType = [ statusType ]

    result = {}
    for element in elementName:

      for sType in statusType:
        # Look in standard location, 'Active' by default
        res = gConfig.getValue( "%s/%s/%s" % ( cs_path, element, sType ), 'Active' )
        result.setdefault( element, {} )[sType] = res

    if result:
      return S_OK( result )

    if default is not None:
      defList = [ [ el, statusType, default ] for el in elementName ]
      return S_OK( getDictFromList( defList ) )

    _msg = "Element '%s', with statusType '%s' is unknown for CS."
    return S_ERROR( DErrno.ERESUNK, _msg % ( elementName, statusType ) )

  def __setRSSElementStatus( self, elementName, elementType, statusType, status, reason, tokenOwner ):
    """
    Sets on the RSS the Elements status
    """

    expiration = datetime.utcnow() + timedelta( days = 1 )

    self.rssCache.acquireLock()
    try:
      res = self.rssClient.addOrModifyStatusElement( 'Resource', 'Status', name = elementName,
                                                     elementType = elementType, status = status,
                                                     statusType = statusType, reason = reason,
                                                     tokenOwner = tokenOwner, tokenExpiration = expiration )

      if res[ 'OK' ]:
        self.rssCache.refreshCache()

      if not res[ 'OK' ]:
        _msg = 'Error updating Element (%s,%s,%s)' % ( elementName, statusType, status )
        gLogger.warn( 'RSS: %s' % _msg )

      return res

    finally:
      # Release lock, no matter what.
      self.rssCache.releaseLock()

  def __setCSElementStatus( self, elementName, elementType, statusType, status ):
    """
    Sets on the CS the Elements status
    """

    # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just do nothing
    if elementType in ('ComputingElement', 'FTS'):
      return S_OK()

    # If we are here it is because elementType is either 'StorageElement' or 'Catalog'
    statuses = self.rssConfig.getConfigStatusType( elementType )
    if statusType not in statuses:
      gLogger.error( "%s is not a valid statusType" % statusType )
      return S_ERROR( "%s is not a valid statusType: %s" % ( statusType, statuses ) )

    if elementType == 'StorageElement':
      cs_path = "/Resources/StorageElements"
    elif elementType == 'Catalog':
      cs_path = "/Resources/FileCatalogs"
      #FIXME: This a probably outdated location (new one is in /Operations/[]/Services/Catalogs)
      # but needs to be VO-aware
      statusType = 'Status'

    csAPI = CSAPI()
    csAPI.setOption( "%s/%s/%s/%s" % ( cs_path, elementName, elementType, statusType ), status )

    res = csAPI.commitChanges()
    if not res[ 'OK' ]:
      gLogger.warn( 'CS: %s' % res[ 'Message' ] )

    return res

  def __getMode( self ):
    """
      Get's flag defined ( or not ) on the RSSConfiguration. If defined as 1,
      we use RSS, if not, we use CS.
    """

    res = self.rssConfig.getConfigState()

    if res == 'Active':
      if self.rssClient is None:
        self.rssClient = ResourceStatusClient()
      return True

    self.rssClient = None
    return False

  def isStorageElementAlwaysBanned( self, seName, statusType ):
    """ Checks if the AlwaysBanned policy is applied to the SE given as parameter

    :param seName : string, name of the SE
    :param statusType : ReadAcces, WriteAccess, RemoveAccess, CheckAccess

    :returns: S_OK(True/False)
    """

    res = getPoliciesThatApply( {'name' : seName, 'statusType' : statusType} )
    if not res['OK']:
      self.log.error( "isStorageElementAlwaysBanned: unable to get the information", res['Message'] )
      return res

    isAlwaysBanned = 'AlwaysBanned' in [policy['type'] for policy in res['Value']]

    return S_OK( isAlwaysBanned )
Exemplo n.º 11
0
class PEP:
    """ PEP ( Policy Enforcement Point )
  """
    def __init__(self, clients=None):
        """ Constructor
    
    examples:
      >>> pep = PEP()
      >>> pep1 = PEP( { 'ResourceStatusClient' : ResourceStatusClient() } )
      >>> pep2 = PEP( { 'ResourceStatusClient' : ResourceStatusClient(), 'ClientY' : None } )
    
    :Parameters:
      **clients** - [ None, `dict` ]
        dictionary with clients to be used in the commands issued by the policies.
        If not defined, the commands will import them. It is a measure to avoid
        opening the same connection every time a policy is evaluated.
        
    """

        if clients is None:
            clients = {}

        # PEP uses internally two of the clients: ResourceStatusClient and ResouceManagementClient
        if 'ResourceStatusClient' in clients:
            self.rsClient = clients['ResourceStatusClient']
        else:
            self.rsClient = ResourceStatusClient()
        if 'ResourceManagementClient' in clients:
            self.rmClient = clients['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()

        self.clients = clients
        # Pass to the PDP the clients that are going to be used on the Commands
        self.pdp = PDP(clients)

    def enforce(self, decisionParams):
        """ Given a dictionary with decisionParams, it is passed to the PDP, which
    will return ( in case there is a/are positive match/es ) a dictionary containing
    three key-pair values: the original decisionParams ( `decisionParams` ), all
    the policies evaluated ( `singlePolicyResults` ) and the computed final result
    ( `policyCombinedResult` ).
    
    To know more about decisionParams, please read PDP.setup where the decisionParams
    are sanitized.
    
    examples:
       >>> pep.enforce( { 'element' : 'Site', 'name' : 'MySite' } )
       >>> pep.enforce( { 'element' : 'Resource', 'name' : 'myce.domain.ch' } )
    
    :Parameters:
      **decisionParams** - `dict`
        dictionary with the parameters that will be used to match policies.
    
    """

        # Setup PDP with new parameters dictionary
        self.pdp.setup(decisionParams)

        # Run policies, get decision, get actions to apply
        resDecisions = self.pdp.takeDecision()
        if not resDecisions['OK']:
            gLogger.error(
                'PEP: Something went wrong, not enforcing policies for %s' %
                decisionParams)
            return resDecisions
        resDecisions = resDecisions['Value']

        # We take from PDP the decision parameters used to find the policies
        decisionParams = resDecisions['decissionParams']
        policyCombinedResult = resDecisions['policyCombinedResult']
        singlePolicyResults = resDecisions['singlePolicyResults']

        # We have run the actions and at this point, we are about to execute the actions.
        # One more final check before proceeding
        isNotUpdated = self.__isNotUpdated(decisionParams)
        if not isNotUpdated['OK']:
            return isNotUpdated

        for policyActionName, policyActionType in policyCombinedResult[
                'PolicyAction']:

            try:
                actionMod = Utils.voimport(
                    'DIRAC.ResourceStatusSystem.PolicySystem.Actions.%s' %
                    policyActionType)
            except ImportError:
                gLogger.error('Error importing %s action' % policyActionType)
                continue

            try:
                action = getattr(actionMod, policyActionType)
            except AttributeError:
                gLogger.error('Error importing %s action class' %
                              policyActionType)
                continue

            actionObj = action(policyActionName, decisionParams,
                               policyCombinedResult, singlePolicyResults,
                               self.clients)

            gLogger.debug((policyActionName, policyActionType))

            actionResult = actionObj.run()
            if not actionResult['OK']:
                gLogger.error(actionResult['Message'])

        return S_OK(resDecisions)

    def __isNotUpdated(self, decisionParams):
        """ Checks for the existence of the element as it was passed to the PEP. It may
    happen that while being the element processed by the PEP an user through the 
    web interface or the CLI has updated the status for this particular element. As
    a result, the PEP would overwrite whatever the user had set. This check is not
    perfect, as still an user action can happen while executing the actions, but
    the probability is close to 0. However, if there is an action that takes seconds
    to be executed, this must be re-evaluated. !
    
    :Parameters:
      **decisionParams** - `dict`
        dictionary with the parameters that will be used to match policies
        
    :return: S_OK / S_ERROR
    
    """

        # Copy original dictionary and get rid of one key we cannot pass as kwarg
        selectParams = decisionParams.copy()
        del selectParams['element']
        del selectParams['active']

        # We expect to have an exact match. If not, then something has changed and
        # we cannot proceed with the actions.
        unchangedRow = self.rsClient.selectStatusElement(
            decisionParams['element'], 'Status', **selectParams)
        if not unchangedRow['OK']:
            return unchangedRow

        if not unchangedRow['Value']:
            msg = '%(name)s  ( %(status)s / %(statusType)s ) has been updated after PEP started running'
            return S_ERROR(msg % selectParams)

        return S_OK()


#...............................................................................
#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
Exemplo n.º 12
0
class ElementInspectorAgent( AgentModule ):
  '''
    The ElementInspector agent is a generic agent used to check the elements
    of one of the elementTypes ( e.g. Site, Resource, Node ).

    This Agent takes care of the Elements. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.
  '''

  # Max number of worker threads by default
  __maxNumberOfThreads = 5
  # ElementType, to be defined among Site, Resource or Node
  __elementType = None
  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = { 'Default' : 
                       { 
                         'Active' : 60, 'Degraded' : 30,  'Probing' : 30, 
                         'Banned' : 30, 'Unknown'  : 15,  'Error'   : 15 
                         } 
                     }
  # queue size limit to stop feeding
  __limitQueueFeeder = 15
  
  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    
    AgentModule.__init__( self, *args, **kwargs )

    # members initialization

    self.maxNumberOfThreads = self.__maxNumberOfThreads
    self.elementType        = self.__elementType
    self.checkingFreqs      = self.__checkingFreqs
    self.limitQueueFeeder   = self.__limitQueueFeeder
    
    self.elementsToBeChecked = None
    self.threadPool          = None
    self.rsClient            = None
    self.clients             = {}

  def initialize( self ):
    ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

    self.maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.maxNumberOfThreads )   
    self.elementType        = self.am_getOption( 'elementType',        self.elementType )
    self.checkingFreqs      = self.am_getOption( 'checkingFreqs',      self.checkingFreqs )
    self.limitQueueFeeder   = self.am_getOption( 'limitQueueFeeder',   self.limitQueueFeeder )      
    
    self.elementsToBeChecked = Queue.Queue()
    self.threadPool          = ThreadPool( self.maxNumberOfThreads,
                                           self.maxNumberOfThreads )

    self.rsClient = ResourceStatusClient()

    self.clients[ 'ResourceStatusClient' ]     = self.rsClient
    self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() 

    return S_OK()
  
  def execute( self ):
    
    # If there are elements in the queue to be processed, we wait ( we know how
    # many elements in total we can have, so if there are more than 15% of them
    # on the queue, we do not add anything ), but the threads are running and
    # processing items from the queue on background.    
    
    qsize = self.elementsToBeChecked.qsize() 
    if qsize > self.limitQueueFeeder:
      self.log.warn( 'Queue not empty ( %s > %s ), skipping feeding loop' % ( qsize, self.limitQueueFeeder ) )
      return S_OK()
    
    # We get all the elements, then we filter.
    elements = self.rsClient.selectStatusElement( self.elementType, 'Status' )
    if not elements[ 'OK' ]:
      self.log.error( elements[ 'Message' ] )
      return elements
      
    utcnow = datetime.datetime.utcnow().replace( microsecond = 0 )  
       
    # filter elements by Type
    for element in elements[ 'Value' ]:
      
      # Maybe an overkill, but this way I have NEVER again to worry about order
      # of elements returned by mySQL on tuples
      elemDict = dict( zip( elements[ 'Columns' ], element ) )
      
      # We skip the elements with token different than "rs_svc"
      if elemDict[ 'TokenOwner' ] != 'rs_svc':
        self.log.info( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ],
                                                              elemDict[ 'StatusType' ],
                                                              elemDict[ 'TokenOwner' ]
                                                             ))
        continue
      
      if not elemDict[ 'ElementType' ] in self.checkingFreqs:
        #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] )
        timeToNextCheck = self.checkingFreqs[ 'Default' ][ elemDict[ 'Status' ] ]
      else:
        timeToNextCheck = self.checkingFreqs[ elemDict[ 'ElementType' ] ][ elemDict[ 'Status' ] ]
              
      if utcnow - datetime.timedelta( minutes = timeToNextCheck ) > elemDict[ 'LastCheckTime' ]:
               
        # We are not checking if the item is already on the queue or not. It may
        # be there, but in any case, it is not a big problem.
        
        lowerElementDict = { 'element' : self.elementType }
        for key, value in elemDict.items():
          lowerElementDict[ key[0].lower() + key[1:] ] = value
        
        # We add lowerElementDict to the queue
        self.elementsToBeChecked.put( lowerElementDict )
        self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], 
                                                           elemDict[ 'ElementType' ],
                                                           elemDict[ 'StatusType' ],
                                                           elemDict[ 'Status' ],
                                                           elemDict[ 'LastCheckTime' ]) )
       
    # Measure size of the queue, more or less, to know how many threads should
    # we start !
    queueSize      = self.elementsToBeChecked.qsize()
    # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil )
    threadsToStart = max( min( self.maxNumberOfThreads, ( queueSize / 30 ) + 1 ), 1 ) 
    threadsRunning = self.threadPool.numWorkingThreads()
    
    self.log.info( 'Needed %d threads to process %d elements' % ( threadsToStart, queueSize ) )
    if threadsRunning:
      self.log.info( 'Already %d threads running' % threadsRunning )
      threadsToStart = max( 0, threadsToStart - threadsRunning )
      self.log.info( 'Starting %d threads to process %d elements' % ( threadsToStart, queueSize ) )
    
    # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps 
    # running. In next loop we will start a new thread, and will be called 0 
    # again. To have a mechanism to see which thread is where, we append the
    # cycle number before the threadId.
    cycle = self._AgentModule__moduleProperties[ 'cyclesDone' ]
    
    for _x in xrange( threadsToStart ):
      threadId = '%s_%s' % ( cycle, _x )
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute, args = ( threadId, ) )
      if not jobUp[ 'OK' ]:
        self.log.error( jobUp[ 'Message' ] )
        
    return S_OK()

  def finalize( self ):
    
    self.log.info( 'draining queue... blocking until empty' )
    # block until all tasks are done
    self.elementsToBeChecked.join()  
    
    return S_OK()
        
## Private methods #############################################################        
        
  def _execute( self, threadNumber ):
    '''
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    '''

    tHeader = '%sJob%s' % ( '* '*30, threadNumber )
    
    self.log.info( '%s UP' % tHeader )
    
    pep = PEP( clients = self.clients )
    
    while True:
    
      try:
        element = self.elementsToBeChecked.get_nowait()
      except Queue.Empty:
        self.log.info( '%s DOWN' % tHeader )
        return S_OK()
      
      self.log.info( '%s ( %s / %s ) being processed' % ( element[ 'name' ], 
                                                          element[ 'status' ],
                                                          element[ 'statusType' ] ) )
      
      resEnforce = pep.enforce( element )
      if not resEnforce[ 'OK' ]:
        self.log.error( resEnforce[ 'Message' ] )
        self.elementsToBeChecked.task_done()
        continue
      
      resEnforce = resEnforce[ 'Value' ]  
      
      oldStatus  = resEnforce[ 'decissionParams' ][ 'status' ]
      statusType = resEnforce[ 'decissionParams' ][ 'statusType' ]
      newStatus  = resEnforce[ 'policyCombinedResult' ][ 'Status' ]
      reason     = resEnforce[ 'policyCombinedResult' ][ 'Reason' ]
      
      if oldStatus != newStatus:
        self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], 
                                                                 statusType,
                                                                 newStatus, 
                                                                 reason, 
                                                                 oldStatus ) )
        
      # Used together with join !
      self.elementsToBeChecked.task_done()   

    self.log.info( '%s DOWN' % tHeader )

    return S_OK()
Exemplo n.º 13
0
class Synchronizer(object):
    '''
    Every time there is a successful write on the CS, Synchronizer().sync() is
    executed. It updates the database with the values on the CS.

    '''

    def __init__(self, rStatus=None, rManagement=None, defaultStatus="Unknown"):

        # Warm up local CS
        CSHelpers.warmUp()

        if rStatus is None:
            self.rStatus = ResourceStatusClient()
        if rManagement is None:
            self.rManagement = ResourceManagementClient()
        self.defaultStatus = defaultStatus

        self.rssConfig = RssConfiguration()
        self.tokenOwner = "rs_svc"
        result = getProxyInfo()
        if result['OK']:
            self.tokenOwner = result['Value']['username']

    def sync(self, _eventName, _params):
        '''
        Main synchronizer method. It synchronizes the three types of elements: Sites,
        Resources and Nodes. Each _syncX method returns a dictionary with the additions
        and deletions.

        examples:
          >>> s.sync( None, None )
              S_OK()

        :Parameters:
          **_eventName** - any
            this parameter is ignored, but needed by caller function.
          **_params** - any
            this parameter is ignored, but needed by caller function.

        :return: S_OK
        '''

        syncSites = self._syncSites()
        if not syncSites['OK']:
            gLogger.error(syncSites['Message'])

        syncResources = self._syncResources()
        if not syncResources['OK']:
            gLogger.error(syncResources['Message'])

        syncNodes = self._syncNodes()
        if not syncNodes['OK']:
            gLogger.error(syncNodes['Message'])

        return S_OK()

    ## Protected methods #########################################################

    def _syncSites(self):
        '''
          Sync sites: compares CS with DB and does the necessary modifications.
        '''

        gLogger.info('-- Synchronizing sites --')

        # sites in CS
        res = CSHelpers.getSites()
        if not res['OK']:
            return res
        sitesCS = res['Value']

        gLogger.verbose('%s sites found in CS' % len(sitesCS))

        # sites in RSS
        result = self.rStatus.selectStatusElement('Site', 'Status',
                                                  meta={'columns': ['Name']})
        if not result['OK']:
            return result
        sitesDB = [siteDB[0] for siteDB in result['Value']]

        # Sites that are in DB but not (anymore) in CS
        toBeDeleted = list(set(sitesDB).difference(set(sitesCS)))
        gLogger.verbose('%s sites to be deleted' % len(toBeDeleted))

        # Delete sites
        for siteName in toBeDeleted:
            deleteQuery = self.rStatus._extermineStatusElement(
                'Site', siteName)
            gLogger.verbose('Deleting site %s' % siteName)
            if not deleteQuery['OK']:
                return deleteQuery

        # Sites that are in CS but not (anymore) in DB
        toBeAdded = list(set(sitesCS).difference(set(sitesDB)))
        gLogger.verbose('%s site entries to be added' % len(toBeAdded))

        for site in toBeAdded:
            query = self.rStatus.addIfNotThereStatusElement('Site', 'Status',
                                                            name=site,
                                                            statusType='all',
                                                            status=self.defaultStatus,
                                                            elementType='Site',
                                                            tokenOwner=self.tokenOwner,
                                                            reason='Synchronized')
            if not query['OK']:
                return query

        return S_OK()

    def _syncResources(self):
        '''
          Sync resources: compares CS with DB and does the necessary modifications.
          ( StorageElements, FTS, FileCatalogs and ComputingElements )
        '''

        gLogger.info('-- Synchronizing Resources --')

        gLogger.verbose('-> StorageElements')
        ses = self.__syncStorageElements()
        if not ses['OK']:
            gLogger.error(ses['Message'])

        gLogger.verbose('-> FTS')
        fts = self.__syncFTS()
        if not fts['OK']:
            gLogger.error(fts['Message'])

        gLogger.verbose('-> FileCatalogs')
        fileCatalogs = self.__syncFileCatalogs()
        if not fileCatalogs['OK']:
            gLogger.error(fileCatalogs['Message'])

        gLogger.verbose('-> ComputingElements')
        computingElements = self.__syncComputingElements()
        if not computingElements['OK']:
            gLogger.error(computingElements['Message'])

        gLogger.verbose('-> removing resources that no longer exist in the CS')
        removingResources = self.__removeNonExistingResourcesFromRM()
        if not removingResources['OK']:
            gLogger.error(removingResources['Message'])

        # FIXME: VOMS

        return S_OK()

    def _syncNodes(self):
        '''
          Sync resources: compares CS with DB and does the necessary modifications.
          ( Queues )
        '''
        gLogger.info('-- Synchronizing Nodes --')

        gLogger.verbose('-> Queues')
        queues = self.__syncQueues()
        if not queues['OK']:
            gLogger.error(queues['Message'])

        return S_OK()

    ## Private methods ###########################################################

    def __removeNonExistingResourcesFromRM(self):
        '''
          Remove resources from DowntimeCache table that no longer exist in the CS.
        '''

        if not getServiceURL("ResourceStatus/ResourceManagement"):
            gLogger.verbose(
                'ResourceManagement is not installed, skipping removal of non existing resources...')
            return S_OK()

        sesHosts = CSHelpers.getStorageElementsHosts()
        if not sesHosts['OK']:
            return sesHosts
        sesHosts = sesHosts['Value']

        resources = sesHosts

        ftsServer = getFTS3Servers()
        if ftsServer['OK']:
            resources.extend(ftsServer['Value'])

        ce = CSHelpers.getComputingElements()
        if ce['OK']:
            resources.extend(ce['Value'])

        downtimes = self.rManagement.selectDowntimeCache()

        if not downtimes['OK']:
            return downtimes

        # Remove hosts that no longer exist in the CS
        for host in downtimes['Value']:
            gLogger.verbose('Checking if %s is still in the CS' % host[0])
            if host[0] not in resources:
                gLogger.verbose(
                    '%s is no longer in CS, removing entry...' % host[0])
                result = self.rManagement.deleteDowntimeCache(name=host[0])

                if not result['OK']:
                    return result

        return S_OK()

    def __syncComputingElements(self):
        '''
          Sync ComputingElements: compares CS with DB and does the necessary modifications.
        '''

        cesCS = CSHelpers.getComputingElements()
        if not cesCS['OK']:
            return cesCS
        cesCS = cesCS['Value']

        gLogger.verbose('%s Computing elements found in CS' % len(cesCS))

        cesDB = self.rStatus.selectStatusElement('Resource', 'Status',
                                                 elementType='ComputingElement',
                                                 meta={'columns': ['Name']})
        if not cesDB['OK']:
            return cesDB
        cesDB = [ceDB[0] for ceDB in cesDB['Value']]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(cesDB).difference(set(cesCS)))
        gLogger.verbose('%s Computing elements to be deleted' %
                        len(toBeDeleted))

        # Delete storage elements
        for ceName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', ceName)

            gLogger.verbose('... %s' % ceName)
            if not deleteQuery['OK']:
                return deleteQuery

        #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType('ComputingElement')

        result = self.rStatus.selectStatusElement('Resource', 'Status',
                                                  elementType='ComputingElement',
                                                  meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        cesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        cesStatusTuples = [(se, statusType)
                           for se in cesCS for statusType in statusTypes]
        toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple)))

        gLogger.debug('%s Computing elements entries to be added' %
                      len(toBeAdded))

        for ceTuple in toBeAdded:

            _name = ceTuple[0]
            _statusType = ceTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'ComputingElement'

            query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name,
                                                            statusType=_statusType,
                                                            status=_status,
                                                            elementType=_elementType,
                                                            tokenOwner=self.tokenOwner,
                                                            reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncFileCatalogs(self):
        '''
          Sync FileCatalogs: compares CS with DB and does the necessary modifications.
        '''

        catalogsCS = CSHelpers.getFileCatalogs()
        if not catalogsCS['OK']:
            return catalogsCS
        catalogsCS = catalogsCS['Value']

        gLogger.verbose('%s File catalogs found in CS' % len(catalogsCS))

        catalogsDB = self.rStatus.selectStatusElement('Resource', 'Status',
                                                      elementType='Catalog',
                                                      meta={'columns': ['Name']})
        if not catalogsDB['OK']:
            return catalogsDB
        catalogsDB = [catalogDB[0] for catalogDB in catalogsDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS)))
        gLogger.verbose('%s File catalogs to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for catalogName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', catalogName)

            gLogger.verbose('... %s' % catalogName)
            if not deleteQuery['OK']:
                return deleteQuery

        #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType('Catalog')

        result = self.rStatus.selectStatusElement('Resource', 'Status',
                                                  elementType='Catalog',
                                                  meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        catalogsStatusTuples = [(se, statusType)
                                for se in catalogsCS for statusType in statusTypes]
        toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s File catalogs entries to be added' %
                        len(toBeAdded))

        for catalogTuple in toBeAdded:

            _name = catalogTuple[0]
            _statusType = catalogTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'Catalog'

            query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name,
                                                            statusType=_statusType,
                                                            status=_status,
                                                            elementType=_elementType,
                                                            tokenOwner=self.tokenOwner,
                                                            reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncFTS(self):
        '''
          Sync FTS: compares CS with DB and does the necessary modifications.
        '''

        ftsCS = CSHelpers.getFTS()
        if not ftsCS['OK']:
            return ftsCS
        ftsCS = ftsCS['Value']

        gLogger.verbose('%s FTS endpoints found in CS' % len(ftsCS))

        ftsDB = self.rStatus.selectStatusElement('Resource', 'Status',
                                                 elementType='FTS',
                                                 meta={'columns': ['Name']})
        if not ftsDB['OK']:
            return ftsDB
        ftsDB = [fts[0] for fts in ftsDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(ftsDB).difference(set(ftsCS)))
        gLogger.verbose('%s FTS endpoints to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for ftsName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', ftsName)

            gLogger.verbose('... %s' % ftsName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('FTS')
        #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement('Resource', 'Status',
                                                  elementType='FTS',
                                                  meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        ftsStatusTuples = [(se, statusType)
                           for se in ftsCS for statusType in statusTypes]
        toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s FTS endpoints entries to be added' %
                        len(toBeAdded))

        for ftsTuple in toBeAdded:

            _name = ftsTuple[0]
            _statusType = ftsTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'FTS'

            query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name,
                                                            statusType=_statusType,
                                                            status=_status,
                                                            elementType=_elementType,
                                                            tokenOwner=self.tokenOwner,
                                                            reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncStorageElements(self):
        '''
          Sync StorageElements: compares CS with DB and does the necessary modifications.
        '''

        sesCS = CSHelpers.getStorageElements()
        if not sesCS['OK']:
            return sesCS
        sesCS = sesCS['Value']

        gLogger.verbose('%s storage elements found in CS' % len(sesCS))

        sesDB = self.rStatus.selectStatusElement('Resource', 'Status',
                                                 elementType='StorageElement',
                                                 meta={'columns': ['Name']})
        if not sesDB['OK']:
            return sesDB
        sesDB = [seDB[0] for seDB in sesDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(sesDB).difference(set(sesCS)))
        gLogger.verbose('%s storage elements to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for sesName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', sesName)

            gLogger.verbose('... %s' % sesName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('StorageElement')
        #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement('Resource', 'Status',
                                                  elementType='StorageElement',
                                                  meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        sesStatusTuples = [(se, statusType)
                           for se in sesCS for statusType in statusTypes]
        toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s storage element entries to be added' %
                        len(toBeAdded))

        for seTuple in toBeAdded:

            _name = seTuple[0]
            _statusType = seTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'StorageElement'

            query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name,
                                                            statusType=_statusType,
                                                            status=_status,
                                                            elementType=_elementType,
                                                            tokenOwner=self.tokenOwner,
                                                            reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncQueues(self):
        '''
          Sync Queues: compares CS with DB and does the necessary modifications.
        '''

        queuesCS = CSHelpers.getQueues()
        if not queuesCS['OK']:
            return queuesCS
        queuesCS = queuesCS['Value']

        gLogger.verbose('%s Queues found in CS' % len(queuesCS))

        queuesDB = self.rStatus.selectStatusElement('Node', 'Status',
                                                    elementType='Queue',
                                                    meta={'columns': ['Name']})
        if not queuesDB['OK']:
            return queuesDB
        queuesDB = [queueDB[0] for queueDB in queuesDB['Value']]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(queuesDB).difference(set(queuesCS)))
        gLogger.verbose('%s Queues to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for queueName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Node', queueName)

            gLogger.verbose('... %s' % queueName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('Queue')
        #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ]

        result = self.rStatus.selectStatusElement('Node', 'Status',
                                                  elementType='Queue',
                                                  meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        queueTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        queueStatusTuples = [(se, statusType)
                             for se in queuesCS for statusType in statusTypes]
        toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple)))

        gLogger.verbose('%s Queue entries to be added' % len(toBeAdded))

        for queueTuple in toBeAdded:

            _name = queueTuple[0]
            _statusType = queueTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'Queue'

            query = self.rStatus.addIfNotThereStatusElement('Node', 'Status', name=_name,
                                                            statusType=_statusType,
                                                            status=_status,
                                                            elementType=_elementType,
                                                            tokenOwner=self.tokenOwner,
                                                            reason=_reason)
            if not query['OK']:
                return query

        return S_OK()
Exemplo n.º 14
0
class SiteStatus(object):
    """
  RSS helper to interact with the 'Site' family on the DB. It provides the most
  demanded functions and a cache to avoid hitting the server too often.

  It provides four methods to interact with the site statuses:
  * getSiteStatuses
  * isUsableSite
  * getUsableSites
  * getSites
  """

    __metaclass__ = DIRACSingleton

    def __init__(self):
        """
    Constructor, initializes the rssClient.
    """

        self.log = gLogger.getSubLogger(self.__class__.__name__)
        self.rssConfig = RssConfiguration()
        self.__opHelper = Operations()
        self.rssFlag = ResourceStatus().rssFlag
        self.rsClient = ResourceStatusClient()

    def getSiteStatuses(self, siteNamesList=None):
        """
    Method that queries the database for status of the sites in a given list.
    If the input is None, it is interpreted as * ( all ).

    If match is positive, the output looks like:
    {
     'test1.test1.org': 'Active',
     'test2.test2.org': 'Banned',
    }

    examples
      >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' }  )
      >>> siteStatus.getSiteStatuses( 'NotExists')
          S_ERROR( ... ))
      >>> siteStatus.getSiteStatuses( None )
          S_OK( { 'test1.test1.org': 'Active',
                  'test2.test2.net': 'Banned', },
                  ...
                }
              )

    :Parameters:
      **siteNamesList** - `list`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        if not siteNamesList:

            if self.rssFlag:
                siteStatusDict = self.rsClient.selectStatusElement(
                    'Site', 'Status', meta={'columns': ['Name', 'Status']})
            else:
                siteStatusDict = RPCClient(
                    'WorkloadManagement/WMSAdministrator').getSiteMaskStatus()

            if not siteStatusDict['OK']:
                return siteStatusDict
            else:
                siteStatusDict = siteStatusDict['Value']

            return S_OK(dict(siteStatusDict))

        siteStatusDict = {}

        for siteName in siteNamesList:

            if self.rssFlag:
                result = self.rsClient.selectStatusElement(
                    'Site',
                    'Status',
                    name=siteName,
                    meta={'columns': ['Status']})
            else:
                result = RPCClient('WorkloadManagement/WMSAdministrator'
                                   ).getSiteMaskStatus(siteName)

            if not result['OK']:
                return result
            elif not result['Value']:
                #if one of the listed elements does not exist continue
                continue
            else:
                if self.rssFlag:
                    siteStatusDict[siteName] = result['Value'][0][0]
                else:
                    siteStatusDict[siteName] = result['Value']

        return S_OK(siteStatusDict)

    def isUsableSite(self, siteName):
        """
    Similar method to getSiteStatus. The difference is the output.
    Given a site name, returns a bool if the site is usable:
    status is Active or Degraded outputs True
    anything else outputs False

    examples
      >>> siteStatus.isUsableSite( 'test1.test1.org' )
          True
      >>> siteStatus.isUsableSite( 'test2.test2.org' )
          False # May be banned
      >>> siteStatus.isUsableSite( None )
          False
      >>> siteStatus.isUsableSite( 'NotExists' )
          False

    :Parameters:
      **siteName** - `string`
        name of the site to be matched

    :return: S_OK() || S_ERROR()
    """

        if self.rssFlag:
            siteStatus = self.rsClient.selectStatusElement(
                'Site', 'Status', name=siteName, meta={'columns': ['Status']})
        else:
            siteStatus = RPCClient('WorkloadManagement/WMSAdministrator'
                                   ).getSiteMaskStatus(siteName)

        if not siteStatus['OK']:
            return siteStatus

        if not siteStatus['Value']:
            # Site does not exist, so it is not usable
            return S_OK(False)

        if self.rssFlag:
            status = siteStatus['Value'][0][0]
        else:
            status = siteStatus['Value']

        if status in ('Active', 'Degraded'):
            return S_OK(True)
        else:
            return S_OK(False)

    def getUsableSites(self, siteNamesList=None):
        """
    Returns all sites that are usable if their
    statusType is either Active or Degraded; in a list.

    examples
      >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getUsableSites( None )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] )
      >>> siteStatus.getUsableSites( 'NotExists' )
          S_ERROR( ... )

    :Parameters:
      **siteNamesList** - `List`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        if not siteNamesList:
            if self.rssFlag:
                result = self.rsClient.selectStatusElement(
                    'Site',
                    'Status',
                    status='Active',
                    meta={'columns': ['Name']})
                if not result['OK']:
                    return result

                activeSites = [x[0] for x in result['Value']]

                result = self.rsClient.selectStatusElement(
                    'Site',
                    'Status',
                    status='Degraded',
                    meta={'columns': ['Name']})
                if not result['OK']:
                    return result

                degradedSites = [x[0] for x in result['Value']]

                return S_OK(activeSites + degradedSites)

            else:
                activeSites = RPCClient(
                    'WorkloadManagement/WMSAdministrator').getSiteMask()
                if not activeSites['OK']:
                    return activeSites

                return S_OK(activeSites['Value'])

        siteStatusList = []

        for siteName in siteNamesList:

            if self.rssFlag:
                siteStatus = self.rsClient.selectStatusElement(
                    'Site',
                    'Status',
                    name=siteName,
                    meta={'columns': ['Status']})
            else:
                siteStatus = RPCClient('WorkloadManagement/WMSAdministrator'
                                       ).getSiteMaskStatus(siteName)

            if not siteStatus['OK']:
                return siteStatus
            elif not siteStatus['Value']:
                #if one of the listed elements does not exist continue
                continue
            else:

                if self.rssFlag:
                    siteStatus = siteStatus['Value'][0][0]
                else:
                    siteStatus = siteStatus['Value']

            if siteStatus in ('Active', 'Degraded'):
                siteStatusList.append(siteName)

        return S_OK(siteStatusList)

    def getSites(self, siteState='Active'):
        """
    By default, it gets the currently active site list

    examples
      >>> siteStatus.getSites()
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Active' )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Banned' )
          S_OK( ['test0.test0.uk', ... ] )
      >>> siteStatus.getSites( 'All' )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] )
      >>> siteStatus.getSites( None )
          S_ERROR( ... )

    :Parameters:
      **siteState** - `String`
        state of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        if not siteState:
            return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty')

        elif siteState.capitalize() == 'All':
            # if no siteState is set return everything
            if self.rssFlag:
                siteStatus = self.rsClient.selectStatusElement(
                    'Site', 'Status', meta={'columns': ['Name']})
            else:
                siteStatus = RPCClient(
                    'WorkloadManagement/WMSAdministrator').getSiteMask('All')

        else:
            # fix case sensitive string
            siteState = siteState.capitalize()
            allowedStateList = [
                'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'
            ]
            if siteState not in allowedStateList:
                return S_ERROR(errno.EINVAL,
                               'Not a valid status, parameter rejected')

            if self.rssFlag:
                siteStatus = self.rsClient.selectStatusElement(
                    'Site',
                    'Status',
                    status=siteState,
                    meta={'columns': ['Name']})
            else:
                siteStatus = RPCClient('WorkloadManagement/WMSAdministrator'
                                       ).getSiteMask(siteState)

        if not siteStatus['OK']:
            return siteStatus
        else:

            if not self.rssFlag:
                return S_OK(siteStatus['Value'])

            siteList = []
            for site in siteStatus['Value']:
                siteList.append(site[0])

            return S_OK(siteList)

    def setSiteStatus(self, site, status, comment='No comment'):
        """
    Set the status of a site in the 'SiteStatus' table of RSS

    examples
      >>> siteStatus.banSite( 'site1.test.test' )
          S_OK()
      >>> siteStatus.banSite( None )
          S_ERROR( ... )

    :Parameters:
      **site** - `String`
        the site that is going to be banned
      **comment** - `String`
        reason for banning

    :return: S_OK() || S_ERROR()
    """

        if not status:
            return S_ERROR(DErrno.ERESUNK, 'status parameter is empty')

        # fix case sensitive string
        status = status.capitalize()
        allowedStateList = [
            'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'
        ]
        if status not in allowedStateList:
            return S_ERROR(errno.EINVAL,
                           'Not a valid status, parameter rejected')

        result = getProxyInfo()
        if result['OK']:
            tokenOwner = result['Value']['username']
        else:
            return S_ERROR("Unable to get user proxy info %s " %
                           result['Message'])

        tokenExpiration = datetime.utcnow() + timedelta(days=1)

        result = self.rsClient.modifyStatusElement(
            'Site',
            'Status',
            status=status,
            name=site,
            tokenExpiration=tokenExpiration,
            reason=comment,
            tokenOwner=tokenOwner)

        if not result['OK']:
            return result

        return S_OK()
Exemplo n.º 15
0
class SummarizeLogsAgent(AgentModule):
    """SummarizeLogsAgent as extension of AgentModule."""
    def __init__(self, *args, **kwargs):
        """Constructor."""

        AgentModule.__init__(self, *args, **kwargs)

        self.rsClient = None
        self.months = 36

    def initialize(self):
        """Standard initialize.

        :return: S_OK

        """

        self.rsClient = ResourceStatusClient()
        self.months = self.am_getOption("Months", self.months)
        return S_OK()

    def execute(self):
        """execute (main method)

        The execute method runs over the three families of tables (Site, Resource and
        Node) performing identical operations. First, selects all logs for a given
        family (and keeps track of which one is the last row ID). It summarizes the
        logs and finally, deletes the logs from the database.

        At last, this agent removes older entries from history tables

        :return: S_OK
        """

        # loop over the tables
        for element in ("Site", "Resource", "Node"):

            self.log.info("Summarizing %s" % element)

            # get all logs to be summarized
            selectLogElements = self._summarizeLogs(element)
            if not selectLogElements["OK"]:
                self.log.error(selectLogElements["Message"])
                continue

            lastID, logElements = selectLogElements["Value"]

            # logElements is a dictionary of key-value pairs as follows:
            # (name, statusType) : list(logs)
            for key, logs in logElements.items():

                sumResult = self._registerLogs(element, key, logs)
                if not sumResult["OK"]:
                    self.log.error(sumResult["Message"])
                    continue

            if lastID is not None:
                self.log.info("Deleting %sLog till ID %s" % (element, lastID))
                deleteResult = self.rsClient.deleteStatusElement(
                    element, "Log", meta={"older": ["ID", lastID]})
                if not deleteResult["OK"]:
                    self.log.error(deleteResult["Message"])
                    continue

        if self.months:
            self._removeOldHistoryEntries(element, self.months)

        return S_OK()

    def _summarizeLogs(self, element):
        """given an element, selects all logs in table <element>Log.

        :param str element: name of the table family (either Site, Resource or Node)
        :return: S_OK(lastID, listOfLogs) / S_ERROR
        """

        selectResults = self.rsClient.selectStatusElement(element, "Log")

        if not selectResults["OK"]:
            return selectResults

        selectedItems = {}
        latestID = None

        if not selectResults["Value"]:
            return S_OK((latestID, selectedItems))

        selectColumns = selectResults["Columns"]
        selectResults = selectResults["Value"]

        if selectResults:
            latestID = dict(zip(selectColumns, selectResults[-1]))["ID"]

        for selectResult in selectResults:

            elementDict = dict(zip(selectColumns, selectResult))

            key = (elementDict["Name"], elementDict["StatusType"])

            if key not in selectedItems:
                selectedItems[key] = [elementDict]
            else:
                lastStatus = selectedItems[key][-1]["Status"]
                lastToken = selectedItems[key][-1]["TokenOwner"]

                # If there are no changes on the Status or the TokenOwner with respect
                # the previous one, discards the log.
                if lastStatus != elementDict[
                        "Status"] or lastToken != elementDict["TokenOwner"]:
                    selectedItems[key].append(elementDict)

        return S_OK((latestID, selectedItems))

    def _registerLogs(self, element, key, logs):
        """Given an element, a key - which is a tuple (<name>, <statusType>)
        and a list of dictionaries, this method inserts them on the <element>History
        table. Before inserting them, checks whether the first one is or is not on
        the <element>History table. If it is, it is not inserted.


        :param str element: name of the table family (either Site, Resource or Node)
        :param tuple key: tuple with the name of the element and the statusType
        :param list logs: list of dictionaries containing the logs
        :return: S_OK(lastID, listOfLogs) / S_ERROR

         :return: S_OK / S_ERROR
        """

        if not logs:
            return S_OK()

        # Undo key
        name, statusType = key

        selectedRes = self.rsClient.selectStatusElement(
            element,
            "History",
            name,
            statusType,
            meta={
                "columns": ["Status", "TokenOwner"],
                "limit": 1,
                "order": ["DateEffective", "desc"]
            },
        )

        if not selectedRes["OK"]:
            return selectedRes
        selectedRes = selectedRes["Value"]
        if not selectedRes:
            for selectedItemDict in logs:
                res = self.__logToHistoryTable(element, selectedItemDict)
                if not res["OK"]:
                    return res
                return S_OK()

        # We want from the <element>History table the last Status, and TokenOwner
        lastStatus, lastToken = None, None
        if selectedRes:
            try:
                lastStatus = selectedRes[0][0]
                lastToken = selectedRes[0][1]
            except IndexError:
                pass

        # If the first of the selected items has a different status than the latest
        # on the history, we keep it, otherwise we remove it.
        if logs[0]["Status"] == lastStatus and logs[0][
                "TokenOwner"] == lastToken:
            logs.pop(0)

        if logs:
            self.log.info("%s (%s):" % (name, statusType))
            self.log.debug(logs)

        for selectedItemDict in logs:

            res = self.__logToHistoryTable(element, selectedItemDict)
            if not res["OK"]:
                return res

        return S_OK()

    def __logToHistoryTable(self, element, elementDict):
        """Given an element and a dictionary with all the arguments, this method
        inserts a new entry on the <element>History table

        :param str element: name of the table family (either Site, Resource or Node)
        :param dict elementDict: dictionary returned from the DB to be inserted on the History table

        :return: S_OK / S_ERROR
        """

        name = elementDict.get("Name")
        statusType = elementDict.get("StatusType")
        # vo = elementDict.get('VO')  # FIXME: not sure about it
        status = elementDict.get("Status")
        elementType = elementDict.get("ElementType")
        reason = elementDict.get("Reason")
        dateEffective = elementDict.get("DateEffective")
        lastCheckTime = elementDict.get("LastCheckTime")
        tokenOwner = elementDict.get("TokenOwner")
        tokenExpiration = elementDict.get("TokenExpiration")

        self.log.info("  %s %s %s %s" %
                      (status, dateEffective, tokenOwner, reason))

        return self.rsClient.insertStatusElement(
            element=element,
            tableType="History",
            name=name,
            statusType=statusType,
            status=status,
            elementType=elementType,
            reason=reason,
            dateEffective=dateEffective,
            lastCheckTime=lastCheckTime,
            tokenOwner=tokenOwner,
            tokenExpiration=tokenExpiration,
        )

    def _removeOldHistoryEntries(self, element, months):
        """Delete entries older than period

        :param str element: name of the table family (either Site, Resource or Node)
        :param int months: number of months

        :return: S_OK / S_ERROR
        """
        toRemove = datetime.utcnow().replace(microsecond=0) - timedelta(
            days=30 * months)
        self.log.info("Removing history entries", "older than %s" % toRemove)

        deleteResult = self.rsClient.deleteStatusElement(
            element, "History", meta={"older": ["DateEffective", toRemove]})
        if not deleteResult["OK"]:
            self.log.error(deleteResult["Message"])
Exemplo n.º 16
0
class TokenAgent(AgentModule):
    """
    TokenAgent is in charge of checking tokens assigned on resources.
    Notifications are sent to those users owning expiring tokens.
    """

    # Rss token
    __rssToken = "rs_svc"

    def __init__(self, *args, **kwargs):
        """c'tor"""

        AgentModule.__init__(self, *args, **kwargs)

        self.notifyHours = 12
        self.adminMail = ""

        self.rsClient = None
        self.tokenDict = {}
        self.diracAdmin = None

    def initialize(self):
        """TokenAgent initialization"""

        self.notifyHours = self.am_getOption("notifyHours", self.notifyHours)
        self.adminMail = self.am_getOption("adminMail", self.adminMail)

        self.rsClient = ResourceStatusClient()
        self.diracAdmin = DiracAdmin()

        return S_OK()

    def execute(self):
        """
        Looks for user tokens. If they are expired, or expiring, it notifies users.
        """

        # Initialized here, as it is needed empty at the beginning of the execution
        self.tokenDict = {}

        elements = ("Site", "Resource", "Node")

        for element in elements:

            self.log.info("Processing %s" % element)

            interestingTokens = self._getInterestingTokens(element)
            if not interestingTokens["OK"]:
                self.log.error(interestingTokens["Message"])
                continue
            interestingTokens = interestingTokens["Value"]

            processTokens = self._processTokens(element, interestingTokens)
            if not processTokens["OK"]:
                self.log.error(processTokens["Message"])
                continue

        notificationResult = self._notifyOfTokens()
        if not notificationResult["OK"]:
            self.log.error(notificationResult["Message"])

        return S_OK()

    def _getInterestingTokens(self, element):
        """
        Given an element, picks all the entries with TokenExpiration < now + X<hours>
        If the TokenOwner is not the rssToken ( rs_svc ), it is selected.
        """

        tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours)

        tokenElements = self.rsClient.selectStatusElement(
            element,
            "Status",
            meta={"older": ["TokenExpiration", tokenExpLimit]})

        if not tokenElements["OK"]:
            return tokenElements

        tokenColumns = tokenElements["Columns"]
        tokenElements = tokenElements["Value"]

        interestingTokens = []

        for tokenElement in tokenElements:

            tokenElement = dict(zip(tokenColumns, tokenElement))

            if tokenElement["TokenOwner"] != self.__rssToken:
                interestingTokens.append(tokenElement)

        return S_OK(interestingTokens)

    def _processTokens(self, element, tokenElements):
        """
        Given an element and a list of interesting token elements, updates the
        database if the token is expired, logs a message and adds
        """

        never = datetime.max

        for tokenElement in tokenElements:

            try:
                name = tokenElement["Name"]
                statusType = tokenElement["StatusType"]
                status = tokenElement["Status"]
                tokenOwner = tokenElement["TokenOwner"]
                tokenExpiration = tokenElement["TokenExpiration"]
            except KeyError as e:
                return S_ERROR(e)

            # If token has already expired
            if tokenExpiration < datetime.utcnow():
                _msg = '%s with statusType "%s" and owner %s EXPIRED'
                self.log.info(_msg % (name, statusType, tokenOwner))

                result = self.rsClient.addOrModifyStatusElement(
                    element,
                    "Status",
                    name=name,
                    statusType=statusType,
                    tokenOwner=self.__rssToken,
                    tokenExpiration=never,
                )
                if not result["OK"]:
                    return result

            else:
                _msg = '%s with statusType "%s" and owner %s -> %s'
                self.log.info(_msg %
                              (name, statusType, tokenOwner, tokenExpiration))

            if tokenOwner not in self.tokenDict:
                self.tokenDict[tokenOwner] = []

            self.tokenDict[tokenOwner].append([
                tokenOwner, element, name, statusType, status, tokenExpiration
            ])

        return S_OK()

    def _notifyOfTokens(self):
        """
        Splits interesing tokens between expired and expiring. Also splits them
        among users. It ends sending notifications to the users.
        """

        now = datetime.utcnow()

        adminExpired = []
        adminExpiring = []

        for tokenOwner, tokenLists in self.tokenDict.items():

            expired = []
            expiring = []

            for tokenList in tokenLists:

                if tokenList[5] < now:
                    expired.append(tokenList)
                    adminExpired.append(tokenList)
                else:
                    expiring.append(tokenList)
                    adminExpiring.append(tokenList)

            resNotify = self._notify(tokenOwner, expired, expiring)
            if not resNotify["OK"]:
                self.log.error("Failed to notify token owner",
                               resNotify["Message"])

        if (adminExpired or adminExpiring) and self.adminMail:
            return self._notify(self.adminMail, adminExpired, adminExpiring)

        return S_OK()

    def _notify(self, tokenOwner, expired, expiring):
        """
        Given a token owner and a list of expired and expiring tokens, sends an
        email to the user.
        """

        subject = "RSS token summary for tokenOwner %s" % tokenOwner

        mail = "\nEXPIRED tokens ( RSS has taken control of them )\n"
        for tokenList in expired:

            mail += " ".join([str(x) for x in tokenList])
            mail += "\n"

        mail = "\nEXPIRING tokens ( RSS will take control of them )\n"
        for tokenList in expiring:

            mail += " ".join([str(x) for x in tokenList])
            mail += "\n"

        mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n"
        mail += " Or you can use the dirac-rss-set-token script\n\n"
        mail += "Through the same interfaces you can release the token any time\n"

        resEmail = self.diracAdmin.sendMail(tokenOwner, subject, mail)
        if not resEmail["OK"]:
            return S_ERROR('Cannot send email to user "%s"' % tokenOwner)

        return resEmail
Exemplo n.º 17
0
def setToken(user):
    """
    Function that gets the user token, sets the validity for it. Gets the elements
    in the database for a given name and statusType(s). Then updates the status
    of all them adding a reason and the token.
  """

    rssClient = ResourceStatusClient()

    # This is a little bit of a nonsense, and certainly needs to be improved.
    # To modify a list of elements, we have to do it one by one. However, the
    # modify method does not discover the StatusTypes ( which in this script is
    # an optional parameter ). So, we get them from the DB and iterate over them.
    elements = rssClient.selectStatusElement(
        switchDict["element"],
        "Status",
        name=switchDict["name"],
        statusType=switchDict["statusType"],
        meta={"columns": ["StatusType", "TokenOwner"]},
    )

    if not elements["OK"]:
        return elements
    elements = elements["Value"]

    # If there list is empty they do not exist on the DB !
    if not elements:
        subLogger.warn(
            "Nothing found for %s, %s, %s" % (switchDict["element"], switchDict["name"], switchDict["statusType"])
        )
        return S_OK()

    # If we want to release the token
    if switchDict["releaseToken"] != False:
        tokenExpiration = datetime.max
        newTokenOwner = "rs_svc"
    else:
        tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta(days=1)
        newTokenOwner = user

    subLogger.info("New token : %s until %s" % (newTokenOwner, tokenExpiration))

    for statusType, tokenOwner in elements:

        # If a user different than the one issuing the command and RSS
        if tokenOwner != user and tokenOwner != "rs_svc":
            subLogger.info("%s(%s) belongs to the user: %s" % (switchDict["name"], statusType, tokenOwner))

        # does the job
        result = rssClient.modifyStatusElement(
            switchDict["element"],
            "Status",
            name=switchDict["name"],
            statusType=statusType,
            reason=switchDict["reason"],
            tokenOwner=newTokenOwner,
            tokenExpiration=tokenExpiration,
        )
        if not result["OK"]:
            return result

        if tokenOwner == newTokenOwner:
            msg = "(extended)"
        elif newTokenOwner == "rs_svc":
            msg = "(released)"
        else:
            msg = "(aquired from %s)" % tokenOwner

        subLogger.info("%s:%s %s" % (switchDict["name"], statusType, msg))
    return S_OK()
Exemplo n.º 18
0
class ResourceStatus(object):
  """
  ResourceStatus helper that connects to CS if RSS flag is not Active. It keeps
  the connection to the db / server as an object member, to avoid creating a new
  one massively.
  """

  __metaclass__ = DIRACSingleton

  def __init__(self, rssFlag=None):
    """
    Constructor, initializes the rssClient.
    """
    self.log = gLogger.getSubLogger(self.__class__.__name__)
    self.rssConfig = RssConfiguration()
    self.__opHelper = Operations()
    self.rssClient = ResourceStatusClient()
    self.rssFlag = rssFlag
    if rssFlag is None:
      self.rssFlag = self.__getMode()

    cacheLifeTime = int(self.rssConfig.getConfigCache())

    # RSSCache only affects the calls directed to RSS, if using the CS it is not used.
    self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache)

  def getElementStatus(self, elementName, elementType, statusType=None, default=None):
    """
    Helper function, tries to get information from the RSS for the given
    Element, otherwise, it gets it from the CS.

    :param elementName: name of the element or list of element names
    :type elementName: str, list
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: None, str, list
    :param default: defult value (meaningful only when rss is InActive)
    :type default: str
    :return: S_OK/S_ERROR
    :rtype: dict

    :Example:

    >>> getElementStatus('CE42', 'ComputingElement')
        S_OK( { 'CE42': { 'all': 'Active' } } } )
    >>> getElementStatus('SE1', 'StorageElement', 'ReadAccess')
        S_OK( { 'SE1': { 'ReadAccess': 'Banned' } } } )
    >>> getElementStatus('SE1', 'ThisIsAWrongElementType', 'ReadAccess')
        S_ERROR( xyz.. )
    >>> getElementStatus('ThisIsAWrongName', 'StorageElement', 'WriteAccess')
        S_ERROR( xyz.. )
    >>> getElementStatus('A_file_catalog', 'FileCatalog')
        S_OK( { 'A_file_catalog': { 'all': 'Active' } } } )
    >>> getElementStatus('SE1', 'StorageElement', ['ReadAccess', 'WriteAccess'])
        S_OK( { 'SE1': { 'ReadAccess': 'Banned' , 'WriteAccess': 'Active'} } } )
    >>> getElementStatus('SE1', 'StorageElement')
        S_OK( { 'SE1': { 'ReadAccess': 'Probing' ,
                         'WriteAccess': 'Active',
                         'CheckAccess': 'Degraded',
                         'RemoveAccess': 'Banned'} } } )
    >>> getElementStatus(['CE1', 'CE2'], 'ComputingElement')
        S_OK( {'CE1': {'all': 'Active'},
               'CE2': {'all': 'Probing'}}}
    """

    allowedParameters = ["StorageElement", "ComputingElement", "FTS", "Catalog"]

    if elementType not in allowedParameters:
      return S_ERROR("%s in not in the list of the allowed parameters: %s" % (elementType, allowedParameters))

    # Apply defaults
    if not statusType:
      if elementType == "StorageElement":
        statusType = ['ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']
      elif elementType == "ComputingElement":
        statusType = ['all']
      elif elementType == "FTS":
        statusType = ['all']
      elif elementType == "Catalog":
        statusType = ['all']

    if self.rssFlag:
      return self.__getRSSElementStatus(elementName, elementType, statusType)
    else:
      return self.__getCSElementStatus(elementName, elementType, statusType, default)

  def setElementStatus(self, elementName, elementType, statusType, status, reason=None, tokenOwner=None):
    """ Tries set information in RSS and in CS.

    :param elementName: name of the element
    :type elementName: str
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: str
    :param reason: reason for setting the status
    :type reason: str
    :param tokenOwner: owner of the token (meaningful only when rss is Active)
    :type tokenOwner: str
    :return: S_OK/S_ERROR
    :rtype: dict

    :Example:

    >>> setElementStatus('CE42', 'ComputingElement', 'all', 'Active')
        S_OK(  xyz.. )
    >>> setElementStatus('SE1', 'StorageElement', 'ReadAccess', 'Banned')
        S_OK(  xyz.. )
    """

    if self.rssFlag:
      return self.__setRSSElementStatus(elementName, elementType, statusType, status, reason, tokenOwner)
    else:
      return self.__setCSElementStatus(elementName, elementType, statusType, status)

################################################################################

  def __updateRssCache(self):
    """ Method used to update the rssCache.

        It will try 5 times to contact the RSS before giving up
    """

    meta = {'columns': ['Name', 'ElementType', 'StatusType', 'Status']}

    for ti in range(5):
      rawCache = self.rssClient.selectStatusElement('Resource', 'Status', meta=meta)
      if rawCache['OK']:
        break
      self.log.warn("Can't get resource's status", rawCache['Message'] + "; trial %d" % ti)
      sleep(math.pow(ti, 2))
      self.rssClient = ResourceStatusClient()

    if not rawCache['OK']:
      return rawCache
    return S_OK(getCacheDictFromRawData(rawCache['Value']))

################################################################################

  def __getRSSElementStatus(self, elementName, elementType, statusType):
    """ Gets from the cache or the RSS the Elements status. The cache is a
        copy of the DB table. If it is not on the cache, most likely is not going
        to be on the DB.

        There is one exception: item just added to the CS, e.g. new Element.
        The period between it is added to the DB and the changes are propagated
        to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime>
        minutes.

    :param elementName: name of the element or list of element names
    :type elementName: str, list
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement,
                       otherwise it is 'all' or ['all'])
    :type statusType: str, list
    """

    cacheMatch = self.rssCache.match(elementName, elementType, statusType)

    self.log.debug('__getRSSElementStatus')
    self.log.debug(cacheMatch)

    return cacheMatch

  def __getCSElementStatus(self, elementName, elementType, statusType, default):
    """ Gets from the CS the Element status

    :param elementName: name of the element
    :type elementName: str
    :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog)
    :type elementType: str
    :param statusType: type of the status (meaningful only when elementType==StorageElement)
    :type statusType: str, list
    :param default: defult value
    :type default: None, str
    """

    # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just return 'Active'
    if elementType in ('ComputingElement', 'FTS'):
      return S_OK({elementName: {'all': 'Active'}})

    # If we are here it is because elementType is either 'StorageElement' or 'Catalog'
    if elementType == 'StorageElement':
      cs_path = "/Resources/StorageElements"
    elif elementType == 'Catalog':
      cs_path = "/Resources/FileCatalogs"
      statusType = ['Status']

    if not isinstance(elementName, list):
      elementName = [elementName]

    if not isinstance(statusType, list):
      statusType = [statusType]

    result = {}
    for element in elementName:

      for sType in statusType:
        # Look in standard location, 'Active' by default
        res = gConfig.getValue("%s/%s/%s" % (cs_path, element, sType), 'Active')
        result.setdefault(element, {})[sType] = res

    if result:
      return S_OK(result)

    if default is not None:
      defList = [[el, statusType, default] for el in elementName]
      return S_OK(getDictFromList(defList))

    _msg = "Element '%s', with statusType '%s' is unknown for CS."
    return S_ERROR(DErrno.ERESUNK, _msg % (elementName, statusType))

  def __setRSSElementStatus(self, elementName, elementType, statusType, status, reason, tokenOwner):
    """
    Sets on the RSS the Elements status
    """

    expiration = datetime.utcnow() + timedelta(days=1)

    self.rssCache.acquireLock()
    try:
      res = self.rssClient.addOrModifyStatusElement('Resource', 'Status', name=elementName,
                                                    elementType=elementType, status=status,
                                                    statusType=statusType, reason=reason,
                                                    tokenOwner=tokenOwner, tokenExpiration=expiration)

      if res['OK']:
        self.rssCache.refreshCache()

      if not res['OK']:
        _msg = 'Error updating Element (%s,%s,%s)' % (elementName, statusType, status)
        gLogger.warn('RSS: %s' % _msg)

      return res

    finally:
      # Release lock, no matter what.
      self.rssCache.releaseLock()

  def __setCSElementStatus(self, elementName, elementType, statusType, status):
    """
    Sets on the CS the Elements status
    """

    # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just do nothing
    if elementType in ('ComputingElement', 'FTS'):
      return S_OK()

    # If we are here it is because elementType is either 'StorageElement' or 'Catalog'
    statuses = self.rssConfig.getConfigStatusType(elementType)
    if statusType not in statuses:
      gLogger.error("%s is not a valid statusType" % statusType)
      return S_ERROR("%s is not a valid statusType: %s" % (statusType, statuses))

    if elementType == 'StorageElement':
      cs_path = "/Resources/StorageElements"
    elif elementType == 'Catalog':
      cs_path = "/Resources/FileCatalogs"
      # FIXME: This a probably outdated location (new one is in /Operations/[]/Services/Catalogs)
      # but needs to be VO-aware
      statusType = 'Status'

    csAPI = CSAPI()
    csAPI.setOption("%s/%s/%s/%s" % (cs_path, elementName, elementType, statusType), status)

    res = csAPI.commitChanges()
    if not res['OK']:
      gLogger.warn('CS: %s' % res['Message'])

    return res

  def __getMode(self):
    """
      Gets flag defined (or not) on the RSSConfiguration.
      If defined as 'Active', we use RSS, if not, we use the CS when possible (and WMS for Sites).
    """

    res = self.rssConfig.getConfigState()

    if res == 'Active':
      if self.rssClient is None:
        self.rssClient = ResourceStatusClient()
      return True

    self.rssClient = None
    return False

  def isStorageElementAlwaysBanned(self, seName, statusType):
    """ Checks if the AlwaysBanned policy is applied to the SE given as parameter

    :param seName: string, name of the SE
    :param statusType: ReadAcces, WriteAccess, RemoveAccess, CheckAccess

    :returns: S_OK(True/False)
    """

    res = getPoliciesThatApply({'name': seName, 'statusType': statusType})
    if not res['OK']:
      self.log.error("isStorageElementAlwaysBanned: unable to get the information", res['Message'])
      return res

    isAlwaysBanned = 'AlwaysBanned' in [policy['type'] for policy in res['Value']]

    return S_OK(isAlwaysBanned)
Exemplo n.º 19
0
class SummarizeLogsAgent( AgentModule ):
  """ SummarizeLogsAgent as extension of AgentModule.
  
  """

  def __init__( self, *args, **kwargs ):
    """ Constructor.
    
    """

    AgentModule.__init__( self, *args, **kwargs )
    
    self.rsClient = None


  def initialize( self ):
    """ Standard initialize.
    
    :return: S_OK
    
    """

    self.rsClient = ResourceStatusClient()
    return S_OK()


  def execute( self ):
    """ execute ( main method )
    
    The execute method runs over the three families of tables ( Site, Resource and
    Node ) performing identical operations. First, selects all logs for a given
    family ( and keeps track of which one is the last row ID ). It summarizes the
    logs and finally, deletes the logs from the database.
    
    :return: S_OK
    
    """

    # loop over the tables
    for element in ( 'Site', 'Resource', 'Node' ):

      self.log.info( 'Summarizing %s' % element )

      # get all logs to be summarized
      selectLogElements = self._summarizeLogs( element )
      if not selectLogElements[ 'OK' ]:
        self.log.error( selectLogElements[ 'Message' ] )
        continue
      
      lastID, logElements = selectLogElements[ 'Value' ]
      
      # logElements is a dictionary of key-value pairs as follows:
      # ( name, statusType ) : list( logs )
      for key, logs in logElements.iteritems():

        sumResult = self._registerLogs( element, key, logs )
        if not sumResult[ 'OK' ]:
          self.log.error( sumResult[ 'Message' ] )
          continue

      if lastID is not None:
        self.log.info( 'Deleting %sLog till ID %s' % ( element, lastID ) )
        deleteResult = self.rsClient.deleteStatusElement( element, 'Log', 
                                                        meta = { 'older' : ( 'ID', lastID ) } )
        if not deleteResult[ 'OK' ]:
          self.log.error( deleteResult[ 'Message' ] )
          continue

    return S_OK()


  #.............................................................................


  def _summarizeLogs( self, element ):
    """ given an element, selects all logs in table <element>Log.
    
    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
    
    :return: S_OK( lastID, listOfLogs ) / S_ERROR
    
    """
    
    selectResults = self.rsClient.selectStatusElement( element, 'Log' )
    
    if not selectResults[ 'OK' ]:
      return selectResults
  
    selectedItems = {}
    selectColumns = selectResults[ 'Columns' ]
    selectResults = selectResults[ 'Value' ]
    
    latestID = None
    if selectResults:
      latestID = dict( zip( selectColumns, selectResults[ -1 ] ) )[ 'ID' ]
    
    for selectResult in selectResults:
      
      elementDict = dict( zip( selectColumns, selectResult ) )
      
      key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] )

      if not key in selectedItems:
        selectedItems[ key ] = [ elementDict ]     
      else:
        lastStatus = selectedItems[ key ][ -1 ][ 'Status' ]
        lastToken  = selectedItems[ key ][ -1 ][ 'TokenOwner' ]
        
        # If there are no changes on the Status or the TokenOwner with respect
        # the previous one, discards the log.
        if lastStatus != elementDict[ 'Status' ] or lastToken != elementDict[ 'TokenOwner' ]:
          selectedItems[ key ].append( elementDict )

    return S_OK( ( latestID, selectedItems ) )
      
  
  def _registerLogs( self, element, key, logs ):
    """ Given an element, a key - which is a tuple ( <name>, <statusType> )
    and a list of dictionaries, this method inserts them on the <element>History
    table. Before inserting them, checks whether the first one is or is not on 
    the <element>History table. If it is, it is not inserted. It also checks 
    whether the LastCheckTime parameter of the first log to be inserted is 
    larger than the last history log LastCheckTime. If not, it means an agent
    cycle has been interrupted and we can run into inconsistencies. It aborts to
    prevent more dramatic results.
    
    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **key** - `tuple`
        tuple with the name of the element and the statusType  
      **logs** - `list`
        list of dictionaries containing the logs
        
     :return: S_OK / S_ERROR   
    
    """

    # Undo key
    name, statusType = key

    selectedRes = self.rsClient.selectStatusElement( element, 'History', name,
                                                     statusType,
                                                     meta = { 'columns' : [ 'Status', 'LastCheckTime', 'TokenOwner' ],
                                                              'limit'   : 1,
                                                              'order'   : 'LastCheckTime DESC' } )

    if not selectedRes[ 'OK' ]:
      return selectedRes
    selectedRes = selectedRes[ 'Value' ]

    # We want from the <element>History table the last Status, LastCheckTime
    # and TokenOwner
    lastStatus, lastCheckTime, lastToken = None, None, None
    if selectedRes:
      lastStatus, lastCheckTime, lastToken = selectedRes[ 0 ]

    # Sanity check to avoid running if an agent cycle has been stopped
    if lastCheckTime and logs[ 0 ][ 'LastCheckTime' ] < lastCheckTime:
      return S_ERROR( 'Overlapping data. Seems the DB has not been cleared properly' )

    # If the first of the selected items has a different status than the latest
    # on the history, we add it.
    if logs[ 0 ][ 'Status' ] == lastStatus and logs[ 0 ][ 'TokenOwner' ] == lastToken:
      logs.remove( logs[ 0 ] )

    if logs:
      self.log.info( '%s ( %s )' % ( name, statusType ) )

    for selectedItemDict in logs:

      res = self.__logToHistoryTable( element, selectedItemDict )
      if not res[ 'OK' ]:
        return res   

    return S_OK()
    

  def __logToHistoryTable( self, element, elementDict ):
    """ Given an element and a dictionary with all the arguments, this method
    inserts a new entry on the <element>History table
    
    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **elementDict** - `dict`
        dictionary returned from the DB to be inserted on the History table
    
    :return: S_OK / S_ERROR 
                
    """

    try:

      name            = elementDict[ 'Name' ]
      statusType      = elementDict[ 'StatusType' ]
      status          = elementDict[ 'Status' ]
      elementType     = elementDict[ 'ElementType' ]
      reason          = elementDict[ 'Reason' ]
      dateEffective   = elementDict[ 'DateEffective' ]
      lastCheckTime   = elementDict[ 'LastCheckTime' ]
      tokenOwner      = elementDict[ 'TokenOwner' ]
      tokenExpiration = elementDict[ 'TokenExpiration' ]

    except KeyError, e:
      return S_ERROR( e )

    self.log.info( '  %(Status)s %(DateEffective)s %(TokenOwner)s %(Reason)s' % elementDict )

    return self.rsClient.insertStatusElement( element, 'History', name, statusType,
                                              status, elementType, reason,
                                              dateEffective, lastCheckTime,
                                              tokenOwner, tokenExpiration )

#...............................................................................
#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
Exemplo n.º 20
0
class TokenAgent(AgentModule):
    '''
    TokenAgent is in charge of checking tokens assigned on resources.
    Notifications are sent to those users owning expiring tokens.
  '''

    # Hours to notify a user
    __notifyHours = 12

    # Rss token
    __rssToken = 'rs_svc'

    # Admin mail
    __adminMail = None

    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''

        AgentModule.__init__(self, *args, **kwargs)

        self.notifyHours = self.__notifyHours
        self.adminMail = self.__adminMail

        self.rsClient = None
        self.tokenDict = None
        self.diracAdmin = None

    def initialize(self):
        ''' TokenAgent initialization
    '''

        self.notifyHours = self.am_getOption('notifyHours', self.notifyHours)
        self.adminMail = self.am_getOption('adminMail', self.adminMail)

        self.rsClient = ResourceStatusClient()
        self.diracAdmin = DiracAdmin()

        return S_OK()

    def execute(self):
        '''
      Looks for user tokens. If they are expired, or expiring, it notifies users.
    '''

        # Initialized here, as it is needed empty at the beginning of the execution
        self.tokenDict = {}

        # FIXME: probably this can be obtained from RssConfiguration instead
        elements = ('Site', 'Resource', 'Node')

        for element in elements:

            self.log.info('Processing %s' % element)

            interestingTokens = self._getInterestingTokens(element)
            if not interestingTokens['OK']:
                self.log.error(interestingTokens['Message'])
                continue
            interestingTokens = interestingTokens['Value']

            processTokens = self._processTokens(element, interestingTokens)
            if not processTokens['OK']:
                self.log.error(processTokens['Message'])
                continue

        notificationResult = self._notifyOfTokens()
        if not notificationResult['OK']:
            self.log.error(notificationResult['Message'])

        return S_OK()

    ## Protected methods #########################################################

    def _getInterestingTokens(self, element):
        '''
      Given an element, picks all the entries with TokenExpiration < now + X<hours>
      If the TokenOwner is not the rssToken ( rs_svc ), it is selected.
    '''

        tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours)

        tokenElements = self.rsClient.selectStatusElement(
            element,
            'Status',
            meta={'older': ('TokenExpiration', tokenExpLimit)})

        if not tokenElements['OK']:
            return tokenElements

        tokenColumns = tokenElements['Columns']
        tokenElements = tokenElements['Value']

        interestingTokens = []

        for tokenElement in tokenElements:

            tokenElement = dict(zip(tokenColumns, tokenElement))

            if tokenElement['TokenOwner'] != self.__rssToken:
                interestingTokens.append(tokenElement)

        return S_OK(interestingTokens)

    def _processTokens(self, element, tokenElements):
        '''
      Given an element and a list of interesting token elements, updates the
      database if the token is expired, logs a message and adds
    '''

        never = datetime.max

        for tokenElement in tokenElements:

            try:
                name = tokenElement['Name']
                statusType = tokenElement['StatusType']
                status = tokenElement['Status']
                tokenOwner = tokenElement['TokenOwner']
                tokenExpiration = tokenElement['TokenExpiration']
            except KeyError, e:
                return S_ERROR(e)

            # If token has already expired
            if tokenExpiration < datetime.utcnow():
                _msg = '%s with statusType "%s" and owner %s EXPIRED'
                self.log.info(_msg % (name, statusType, tokenOwner))

                result = self.rsClient.addOrModifyStatusElement(
                    element,
                    'Status',
                    name=name,
                    statusType=statusType,
                    tokenOwner=self.__rssToken,
                    tokenExpiration=never)
                if not result['OK']:
                    return result

            else:
                _msg = '%s with statusType "%s" and owner %s -> %s'
                self.log.info(_msg %
                              (name, statusType, tokenOwner, tokenExpiration))

            if not tokenOwner in self.tokenDict:
                self.tokenDict[tokenOwner] = []

            self.tokenDict[tokenOwner].append([
                tokenOwner, element, name, statusType, status, tokenExpiration
            ])

        return S_OK()
Exemplo n.º 21
0
class PEP:
  """ PEP ( Policy Enforcement Point )
  """

  def __init__( self, clients = None ):
    """ Constructor
    
    examples:
      >>> pep = PEP()
      >>> pep1 = PEP( { 'ResourceStatusClient' : ResourceStatusClient() } )
      >>> pep2 = PEP( { 'ResourceStatusClient' : ResourceStatusClient(), 'ClientY' : None } )
    
    :Parameters:
      **clients** - [ None, `dict` ]
        dictionary with clients to be used in the commands issued by the policies.
        If not defined, the commands will import them. It is a measure to avoid
        opening the same connection every time a policy is evaluated.
        
    """
   
    if clients is None:
      clients = {}
    
    # PEP uses internally two of the clients: ResourceStatusClient and ResouceManagementClient   
    if 'ResourceStatusClient' in clients:           
      self.rsClient = clients[ 'ResourceStatusClient' ]
    else:
      self.rsClient = ResourceStatusClient()
    if 'ResourceManagementClient' in clients:             
      self.rmClient = clients[ 'ResourceManagementClient' ]
    else: 
      self.rmClient = ResourceManagementClient()

    self.clients = clients
    # Pass to the PDP the clients that are going to be used on the Commands
    self.pdp     = PDP( clients )   


  def enforce( self, decisionParams ):
    """ Given a dictionary with decisionParams, it is passed to the PDP, which
    will return ( in case there is a/are positive match/es ) a dictionary containing
    three key-pair values: the original decisionParams ( `decisionParams` ), all
    the policies evaluated ( `singlePolicyResults` ) and the computed final result
    ( `policyCombinedResult` ).
    
    To know more about decisionParams, please read PDP.setup where the decisionParams
    are sanitized.
    
    examples:
       >>> pep.enforce( { 'element' : 'Site', 'name' : 'MySite' } )
       >>> pep.enforce( { 'element' : 'Resource', 'name' : 'myce.domain.ch' } )
    
    :Parameters:
      **decisionParams** - `dict`
        dictionary with the parameters that will be used to match policies.
    
    """ 
    
    # Setup PDP with new parameters dictionary
    self.pdp.setup( decisionParams )

    # Run policies, get decision, get actions to apply
    resDecisions = self.pdp.takeDecision()
    if not resDecisions[ 'OK' ]:
      gLogger.error( 'PEP: Something went wrong, not enforcing policies for %s' % decisionParams )
      return resDecisions
    resDecisions = resDecisions[ 'Value' ]
    
    # We take from PDP the decision parameters used to find the policies
    decisionParams       = resDecisions[ 'decissionParams' ]
    policyCombinedResult = resDecisions[ 'policyCombinedResult' ]
    singlePolicyResults  = resDecisions[ 'singlePolicyResults' ]

    # We have run the actions and at this point, we are about to execute the actions.
    # One more final check before proceeding
    isNotUpdated = self.__isNotUpdated( decisionParams )
    if not isNotUpdated[ 'OK' ]:
      return isNotUpdated
                
    for policyActionName, policyActionType in policyCombinedResult[ 'PolicyAction' ]:
      
      try:
        actionMod = Utils.voimport( 'DIRAC.ResourceStatusSystem.PolicySystem.Actions.%s' % policyActionType )
      except ImportError:
        gLogger.error( 'Error importing %s action' % policyActionType )
        continue
      
      try:
        action = getattr( actionMod, policyActionType )
      except AttributeError:
        gLogger.error( 'Error importing %s action class' % policyActionType )
        continue  
              
      actionObj = action( policyActionName, decisionParams, policyCombinedResult,
                          singlePolicyResults, self.clients )
      
      gLogger.debug( ( policyActionName, policyActionType ) )
      
      actionResult = actionObj.run()
      if not actionResult[ 'OK' ]:
        gLogger.error( actionResult[ 'Message' ] ) 
        
    return S_OK( resDecisions )


  def __isNotUpdated( self, decisionParams ):
    """ Checks for the existence of the element as it was passed to the PEP. It may
    happen that while being the element processed by the PEP an user through the 
    web interface or the CLI has updated the status for this particular element. As
    a result, the PEP would overwrite whatever the user had set. This check is not
    perfect, as still an user action can happen while executing the actions, but
    the probability is close to 0. However, if there is an action that takes seconds
    to be executed, this must be re-evaluated. !
    
    :Parameters:
      **decisionParams** - `dict`
        dictionary with the parameters that will be used to match policies
        
    :return: S_OK / S_ERROR
    
    """
    
    # Copy original dictionary and get rid of one key we cannot pass as kwarg
    selectParams = decisionParams.copy()
    del selectParams[ 'element' ]
    del selectParams[ 'active' ]
    
    # We expect to have an exact match. If not, then something has changed and
    # we cannot proceed with the actions.    
    unchangedRow = self.rsClient.selectStatusElement( decisionParams[ 'element' ], 
                                                      'Status', **selectParams )
    if not unchangedRow[ 'OK' ]:
      return unchangedRow
    
    if not unchangedRow[ 'Value' ]:
      msg = '%(name)s  ( %(status)s / %(statusType)s ) has been updated after PEP started running'
      return S_ERROR( msg % selectParams )
    
    return S_OK()

#...............................................................................
#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
Exemplo n.º 22
0
class SiteStatus(object):
  """
  RSS helper to interact with the 'Site' family on the DB. It provides the most
  demanded functions and a cache to avoid hitting the server too often.

  It provides four methods to interact with the site statuses:
  * getSiteStatuses
  * isUsableSite
  * getUsableSites
  * getSites
  """

  __metaclass__ = DIRACSingleton

  def __init__(self):
    """
    Constructor, initializes the rssClient.
    """

    self.log = gLogger.getSubLogger(self.__class__.__name__)
    self.rssConfig = RssConfiguration()
    self.__opHelper = Operations()
    self.rssFlag = ResourceStatus().rssFlag
    self.rsClient = ResourceStatusClient()

    # We can set CacheLifetime and CacheHistory from CS, so that we can tune them.
    cacheLifeTime = int(self.rssConfig.getConfigCache())

    # RSSCache only affects the calls directed to RSS, if using the CS it is not used.
    self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache)

  def __updateRssCache(self):
    """ Method used to update the rssCache.

        It will try 5 times to contact the RSS before giving up
    """

    meta = {'columns': ['Name', 'Status']}

    for ti in xrange(5):
      rawCache = self.rsClient.selectStatusElement('Site', 'Status', meta=meta)
      if rawCache['OK']:
        break
      self.log.warn("Can't get resource's status", rawCache['Message'] + "; trial %d" % ti)
      sleep(math.pow(ti, 2))
      self.rsClient = ResourceStatusClient()

    if not rawCache['OK']:
      return rawCache
    return S_OK(getCacheDictFromRawData(rawCache['Value']))

  def getSiteStatuses(self, siteNames=None):
    """
    Method that queries the database for status of the sites in a given list.
    A single string site name may also be provides as "siteNames"
    If the input is None, it is interpreted as * ( all ).

    If match is positive, the output looks like:
    {
     'test1.test1.org': 'Active',
     'test2.test2.org': 'Banned',
    }

    examples
      >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' }  )
      >>> siteStatus.getSiteStatuses( 'NotExists')
          S_ERROR( ... ))
      >>> siteStatus.getSiteStatuses( None )
          S_OK( { 'test1.test1.org': 'Active',
                  'test2.test2.net': 'Banned', },
                  ...
                }
              )

    :Parameters:
      **siteNames** - `list` or `str`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

    if self.rssFlag:
      return self.__getRSSSiteStatus(siteNames)
    else:
      siteStatusDict = {}
      wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
      if siteNames:
        if isinstance(siteNames, basestring):
          siteNames = [siteNames]
        for siteName in siteNames:
          result = wmsAdmin.getSiteMaskStatus(siteName)
          if not result['OK']:
            return result
          else:
            siteStatusDict[siteName] = result['Value']
      else:
        result = wmsAdmin.getSiteMaskStatus()
        if not result['OK']:
          return result
        else:
          siteStatusDict = result['Value']

      return S_OK(siteStatusDict)

  def __getRSSSiteStatus(self, siteName=None):
    """ Gets from the cache or the RSS the Sites status. The cache is a
        copy of the DB table. If it is not on the cache, most likely is not going
        to be on the DB.

        There is one exception: item just added to the CS, e.g. new Element.
        The period between it is added to the DB and the changes are propagated
        to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime>
        minutes.

    :param siteName: name of the site
    :type siteName: str

    :return: dict
    """

    cacheMatch = self.rssCache.match(siteName, '', '')

    self.log.debug('__getRSSSiteStatus')
    self.log.debug(cacheMatch)

    return cacheMatch

  def getUsableSites(self, siteNames=None):
    """
    Returns all sites that are usable if their
    statusType is either Active or Degraded; in a list.

    examples
      >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getUsableSites( None )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] )
      >>> siteStatus.getUsableSites( 'NotExists' )
          S_ERROR( ... )

    :Parameters:
      **siteNames** - `List` or `str`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

    siteStatusDictRes = self.getSiteStatuses(siteNames)
    if not siteStatusDictRes['OK']:
      return siteStatusDictRes
    siteStatusList = [x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] in ['Active', 'Degraded']]

    return S_OK(siteStatusList)

  def getSites(self, siteState='Active'):
    """
    By default, it gets the currently active site list

    examples
      >>> siteStatus.getSites()
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Active' )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Banned' )
          S_OK( ['test0.test0.uk', ... ] )
      >>> siteStatus.getSites( 'All' )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] )
      >>> siteStatus.getSites( None )
          S_ERROR( ... )

    :Parameters:
      **siteState** - `String`
        state of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

    if not siteState:
      return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty')

    siteStatusDictRes = self.getSiteStatuses()
    if not siteStatusDictRes['OK']:
      return siteStatusDictRes

    if siteState.capitalize() == 'All':
      # if no siteState is set return everything
      siteList = list(siteStatusDictRes['Value'])

    else:
      # fix case sensitive string
      siteState = siteState.capitalize()
      allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown']
      if siteState not in allowedStateList:
        return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected')

      siteList = [x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] == siteState]

    return S_OK(siteList)

  def setSiteStatus(self, site, status, comment='No comment'):
    """
    Set the status of a site in the 'SiteStatus' table of RSS

    examples
      >>> siteStatus.banSite( 'site1.test.test' )
          S_OK()
      >>> siteStatus.banSite( None )
          S_ERROR( ... )

    :Parameters:
      **site** - `String`
        the site that is going to be banned
      **comment** - `String`
        reason for banning

    :return: S_OK() || S_ERROR()
    """

    if not status:
      return S_ERROR(DErrno.ERESUNK, 'status parameter is empty')

    # fix case sensitive string
    status = status.capitalize()
    allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown']

    if status not in allowedStateList:
      return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected')

    if self.rssFlag:
      result = getProxyInfo()
      if result['OK']:
        tokenOwner = result['Value']['username']
      else:
        return S_ERROR("Unable to get user proxy info %s " % result['Message'])

      tokenExpiration = datetime.utcnow() + timedelta(days=1)

      self.rssCache.acquireLock()
      try:
        result = self.rsClient.modifyStatusElement('Site', 'Status', status=status, name=site,
                                                   tokenExpiration=tokenExpiration, reason=comment,
                                                   tokenOwner=tokenOwner)
        if result['OK']:
          self.rssCache.refreshCache()
        else:
          _msg = 'Error updating status of site %s to %s' % (site, status)
          gLogger.warn('RSS: %s' % _msg)

      # Release lock, no matter what.
      finally:
        self.rssCache.releaseLock()

    else:
      if status in ['Active', 'Degraded']:
        result = RPCClient('WorkloadManagement/WMSAdministrator').allowSite()
      else:
        result = RPCClient('WorkloadManagement/WMSAdministrator').banSite()

    return result
Exemplo n.º 23
0
class Synchronizer:
    """
    Every time there is a successful write on the CS, Synchronizer().sync() is
    executed. It updates the database with the values on the CS.

    """
    def __init__(self,
                 rStatus=None,
                 rManagement=None,
                 defaultStatus="Unknown"):

        # Warm up local CS
        CSHelpers.warmUp()

        if rStatus is None:
            self.rStatus = ResourceStatusClient()
        if rManagement is None:
            self.rManagement = ResourceManagementClient()
        self.defaultStatus = defaultStatus

        self.rssConfig = RssConfiguration()

        # this just sets the main owner, "rs_svc" just mean "RSS service"
        self.tokenOwner = "rs_svc"

        # if we are running this script as a user (from a CLI),
        # the username found the proxy will be used as tokenOwner
        result = getProxyInfo()
        if result["OK"]:
            self.tokenOwner = result["Value"]["username"]

    def sync(self, _eventName, _params):
        """
        Main synchronizer method. It synchronizes the three types of elements: Sites,
        Resources and Nodes. Each _syncX method returns a dictionary with the additions
        and deletions.

        examples:
          >>> s.sync( None, None )
              S_OK()

        :Parameters:
          **_eventName** - any
            this parameter is ignored, but needed by caller function.
          **_params** - any
            this parameter is ignored, but needed by caller function.

        :return: S_OK
        """

        syncSites = self._syncSites()
        if not syncSites["OK"]:
            gLogger.error(syncSites["Message"])

        syncResources = self._syncResources()
        if not syncResources["OK"]:
            gLogger.error(syncResources["Message"])

        syncNodes = self._syncNodes()
        if not syncNodes["OK"]:
            gLogger.error(syncNodes["Message"])

        return S_OK()

    def _syncSites(self):
        """
        Sync sites: compares CS with DB and does the necessary modifications.
        """

        gLogger.info("-- Synchronizing sites --")

        # sites in CS
        res = getSites()
        if not res["OK"]:
            return res
        sitesCS = res["Value"]

        gLogger.verbose("%s sites found in CS" % len(sitesCS))

        # sites in RSS
        result = self.rStatus.selectStatusElement("Site",
                                                  "Status",
                                                  meta={"columns": ["Name"]})
        if not result["OK"]:
            return result
        sitesDB = [siteDB[0] for siteDB in result["Value"]]

        # Sites that are in DB but not (anymore) in CS
        toBeDeleted = list(set(sitesDB).difference(set(sitesCS)))
        gLogger.verbose("%s sites to be deleted" % len(toBeDeleted))

        # Delete sites
        for siteName in toBeDeleted:
            deleteQuery = self.rStatus._extermineStatusElement(
                "Site", siteName)
            gLogger.verbose("Deleting site %s" % siteName)
            if not deleteQuery["OK"]:
                return deleteQuery

        # Sites that are in CS but not (anymore) in DB
        toBeAdded = list(set(sitesCS).difference(set(sitesDB)))
        gLogger.verbose("%s site entries to be added" % len(toBeAdded))

        for site in toBeAdded:
            query = self.rStatus.addIfNotThereStatusElement(
                "Site",
                "Status",
                name=site,
                statusType="all",
                status=self.defaultStatus,
                elementType="Site",
                tokenOwner=self.tokenOwner,
                reason="Synchronized",
            )
            if not query["OK"]:
                return query

        return S_OK()

    def _syncResources(self):
        """
        Sync resources: compares CS with DB and does the necessary modifications.
        ( StorageElements, FTS, FileCatalogs and ComputingElements )
        """

        gLogger.info("-- Synchronizing Resources --")

        gLogger.verbose("-> StorageElements")
        ses = self.__syncStorageElements()
        if not ses["OK"]:
            gLogger.error(ses["Message"])

        gLogger.verbose("-> FTS")
        fts = self.__syncFTS()
        if not fts["OK"]:
            gLogger.error(fts["Message"])

        gLogger.verbose("-> FileCatalogs")
        fileCatalogs = self.__syncFileCatalogs()
        if not fileCatalogs["OK"]:
            gLogger.error(fileCatalogs["Message"])

        gLogger.verbose("-> ComputingElements")
        computingElements = self.__syncComputingElements()
        if not computingElements["OK"]:
            gLogger.error(computingElements["Message"])

        gLogger.verbose("-> removing resources that no longer exist in the CS")
        removingResources = self.__removeNonExistingResourcesFromRM()
        if not removingResources["OK"]:
            gLogger.error(removingResources["Message"])

        return S_OK()

    def _syncNodes(self):
        """
        Sync resources: compares CS with DB and does the necessary modifications.
        ( Queues )
        """
        gLogger.info("-- Synchronizing Nodes --")

        gLogger.verbose("-> Queues")
        queues = self.__syncQueues()
        if not queues["OK"]:
            gLogger.error(queues["Message"])

        return S_OK()

    def __removeNonExistingResourcesFromRM(self):
        """
        Remove resources from DowntimeCache table that no longer exist in the CS.
        """

        if not getServiceURL("ResourceStatus/ResourceManagement"):
            gLogger.verbose(
                "ResourceManagement is not installed, skipping removal of non existing resources..."
            )
            return S_OK()

        sesHosts = getStorageElementsHosts()
        if not sesHosts["OK"]:
            return sesHosts
        sesHosts = sesHosts["Value"]

        resources = sesHosts

        ftsServer = getFTS3Servers(hostOnly=True)
        if ftsServer["OK"]:
            resources.extend(ftsServer["Value"])

        res = getCESiteMapping()
        if res["OK"]:
            resources.extend(list(res["Value"]))

        downtimes = self.rManagement.selectDowntimeCache()

        if not downtimes["OK"]:
            return downtimes

        # Remove hosts that no longer exist in the CS
        for host in downtimes["Value"]:
            gLogger.verbose("Checking if %s is still in the CS" % host[0])
            if host[0] not in resources:
                gLogger.verbose("%s is no longer in CS, removing entry..." %
                                host[0])
                result = self.rManagement.deleteDowntimeCache(name=host[0])

                if not result["OK"]:
                    return result

        return S_OK()

    def __syncComputingElements(self):
        """
        Sync ComputingElements: compares CS with DB and does the necessary modifications.
        """

        res = getCESiteMapping()
        if not res["OK"]:
            return res
        cesCS = list(res["Value"])

        gLogger.verbose("%s Computing elements found in CS" % len(cesCS))

        cesDB = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="ComputingElement",
            meta={"columns": ["Name"]})
        if not cesDB["OK"]:
            return cesDB
        cesDB = [ceDB[0] for ceDB in cesDB["Value"]]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(cesDB).difference(set(cesCS)))
        gLogger.verbose("%s Computing elements to be deleted" %
                        len(toBeDeleted))

        # Delete storage elements
        for ceName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                "Resource", ceName)

            gLogger.verbose("... %s" % ceName)
            if not deleteQuery["OK"]:
                return deleteQuery

        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType("ComputingElement")

        result = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="ComputingElement",
            meta={"columns": ["Name", "StatusType"]})
        if not result["OK"]:
            return result
        cesTuple = [(x[0], x[1]) for x in result["Value"]]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        cesStatusTuples = [(se, statusType) for se in cesCS
                           for statusType in statusTypes]
        toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple)))

        gLogger.debug("%s Computing elements entries to be added" %
                      len(toBeAdded))

        for ceTuple in toBeAdded:

            _name = ceTuple[0]
            _statusType = ceTuple[1]
            _status = self.defaultStatus
            _reason = "Synchronized"
            _elementType = "ComputingElement"

            query = self.rStatus.addIfNotThereStatusElement(
                "Resource",
                "Status",
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason,
            )
            if not query["OK"]:
                return query

        return S_OK()

    def __syncFileCatalogs(self):
        """
        Sync FileCatalogs: compares CS with DB and does the necessary modifications.
        """

        catalogsCS = CSHelpers.getFileCatalogs()
        if not catalogsCS["OK"]:
            return catalogsCS
        catalogsCS = catalogsCS["Value"]

        gLogger.verbose("%s File catalogs found in CS" % len(catalogsCS))

        catalogsDB = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="Catalog",
            meta={"columns": ["Name"]})
        if not catalogsDB["OK"]:
            return catalogsDB
        catalogsDB = [catalogDB[0] for catalogDB in catalogsDB["Value"]]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS)))
        gLogger.verbose("%s File catalogs to be deleted" % len(toBeDeleted))

        # Delete storage elements
        for catalogName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                "Resource", catalogName)

            gLogger.verbose("... %s" % catalogName)
            if not deleteQuery["OK"]:
                return deleteQuery

        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType("Catalog")

        result = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="Catalog",
            meta={"columns": ["Name", "StatusType"]})
        if not result["OK"]:
            return result
        sesTuple = [(x[0], x[1]) for x in result["Value"]]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        catalogsStatusTuples = [(se, statusType) for se in catalogsCS
                                for statusType in statusTypes]
        toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose("%s File catalogs entries to be added" %
                        len(toBeAdded))

        for catalogTuple in toBeAdded:

            _name = catalogTuple[0]
            _statusType = catalogTuple[1]
            _status = self.defaultStatus
            _reason = "Synchronized"
            _elementType = "Catalog"

            query = self.rStatus.addIfNotThereStatusElement(
                "Resource",
                "Status",
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason,
            )
            if not query["OK"]:
                return query

        return S_OK()

    def __syncFTS(self):
        """
        Sync FTS: compares CS with DB and does the necessary modifications.
        """

        ftsCS = CSHelpers.getFTS()
        if not ftsCS["OK"]:
            return ftsCS
        ftsCS = ftsCS["Value"]

        gLogger.verbose("%s FTS endpoints found in CS" % len(ftsCS))

        ftsDB = self.rStatus.selectStatusElement("Resource",
                                                 "Status",
                                                 elementType="FTS",
                                                 meta={"columns": ["Name"]})
        if not ftsDB["OK"]:
            return ftsDB
        ftsDB = [fts[0] for fts in ftsDB["Value"]]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(ftsDB).difference(set(ftsCS)))
        gLogger.verbose("%s FTS endpoints to be deleted" % len(toBeDeleted))

        # Delete storage elements
        for ftsName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                "Resource", ftsName)

            gLogger.verbose("... %s" % ftsName)
            if not deleteQuery["OK"]:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType("FTS")
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="FTS",
            meta={"columns": ["Name", "StatusType"]})
        if not result["OK"]:
            return result
        sesTuple = [(x[0], x[1]) for x in result["Value"]]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        ftsStatusTuples = [(se, statusType) for se in ftsCS
                           for statusType in statusTypes]
        toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose("%s FTS endpoints entries to be added" %
                        len(toBeAdded))

        for ftsTuple in toBeAdded:

            _name = ftsTuple[0]
            _statusType = ftsTuple[1]
            _status = self.defaultStatus
            _reason = "Synchronized"
            _elementType = "FTS"

            query = self.rStatus.addIfNotThereStatusElement(
                "Resource",
                "Status",
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason,
            )
            if not query["OK"]:
                return query

        return S_OK()

    def __syncStorageElements(self):
        """
        Sync StorageElements: compares CS with DB and does the necessary modifications.
        """

        sesCS = DMSHelpers().getStorageElements()

        gLogger.verbose("%s storage elements found in CS" % len(sesCS))

        sesDB = self.rStatus.selectStatusElement("Resource",
                                                 "Status",
                                                 elementType="StorageElement",
                                                 meta={"columns": ["Name"]})
        if not sesDB["OK"]:
            return sesDB
        sesDB = [seDB[0] for seDB in sesDB["Value"]]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(sesDB).difference(set(sesCS)))
        gLogger.verbose("%s storage elements to be deleted" % len(toBeDeleted))

        # Delete storage elements
        for sesName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                "Resource", sesName)

            gLogger.verbose("... %s" % sesName)
            if not deleteQuery["OK"]:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType("StorageElement")
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement(
            "Resource",
            "Status",
            elementType="StorageElement",
            meta={"columns": ["Name", "StatusType"]})
        if not result["OK"]:
            return result
        sesTuple = [(x[0], x[1]) for x in result["Value"]]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        sesStatusTuples = [(se, statusType) for se in sesCS
                           for statusType in statusTypes]
        toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple)))

        gLogger.verbose("%s storage element entries to be added" %
                        len(toBeAdded))

        for seTuple in toBeAdded:

            _name = seTuple[0]
            _statusType = seTuple[1]
            _status = self.defaultStatus
            _reason = "Synchronized"
            _elementType = "StorageElement"

            query = self.rStatus.addIfNotThereStatusElement(
                "Resource",
                "Status",
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason,
            )
            if not query["OK"]:
                return query

        return S_OK()

    def __syncQueues(self):
        """
        Sync Queues: compares CS with DB and does the necessary modifications.
        """

        queuesCS = CSHelpers.getQueuesRSS()
        if not queuesCS["OK"]:
            return queuesCS
        queuesCS = queuesCS["Value"]

        gLogger.verbose("%s Queues found in CS" % len(queuesCS))

        queuesDB = self.rStatus.selectStatusElement("Node",
                                                    "Status",
                                                    elementType="Queue",
                                                    meta={"columns": ["Name"]})
        if not queuesDB["OK"]:
            return queuesDB
        queuesDB = [queueDB[0] for queueDB in queuesDB["Value"]]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(queuesDB).difference(set(queuesCS)))
        gLogger.verbose("%s Queues to be deleted" % len(toBeDeleted))

        # Delete storage elements
        for queueName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                "Node", queueName)

            gLogger.verbose("... %s" % queueName)
            if not deleteQuery["OK"]:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType("Queue")
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ]

        result = self.rStatus.selectStatusElement(
            "Node",
            "Status",
            elementType="Queue",
            meta={"columns": ["Name", "StatusType"]})
        if not result["OK"]:
            return result
        queueTuple = [(x[0], x[1]) for x in result["Value"]]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        queueStatusTuples = [(se, statusType) for se in queuesCS
                             for statusType in statusTypes]
        toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple)))

        gLogger.verbose("%s Queue entries to be added" % len(toBeAdded))

        for queueTuple in toBeAdded:

            _name = queueTuple[0]
            _statusType = queueTuple[1]
            _status = self.defaultStatus
            _reason = "Synchronized"
            _elementType = "Queue"

            query = self.rStatus.addIfNotThereStatusElement(
                "Node",
                "Status",
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason,
            )
            if not query["OK"]:
                return query

        return S_OK()
Exemplo n.º 24
0
class Synchronizer(object):
    '''
  Every time there is a successful write on the CS, Synchronizer().sync() is
  executed. It updates the database with the values on the CS.

  '''
    def __init__(self,
                 rStatus=None,
                 rManagement=None,
                 defaultStatus="Unknown"):

        # Warm up local CS
        CSHelpers.warmUp()

        if rStatus is None:
            self.rStatus = ResourceStatusClient()
        if rManagement is None:
            self.rManagement = ResourceManagementClient()
        self.defaultStatus = defaultStatus

        self.rssConfig = RssConfiguration()

        # this just sets the main owner, "rs_svc" just mean "RSS service"
        self.tokenOwner = "rs_svc"

        # if we are running this script as a user (from a CLI),
        # the username found the proxy will be used as tokenOwner
        result = getProxyInfo()
        if result['OK']:
            self.tokenOwner = result['Value']['username']

    def sync(self, _eventName, _params):
        '''
    Main synchronizer method. It synchronizes the three types of elements: Sites,
    Resources and Nodes. Each _syncX method returns a dictionary with the additions
    and deletions.

    examples:
      >>> s.sync( None, None )
          S_OK()

    :Parameters:
      **_eventName** - any
        this parameter is ignored, but needed by caller function.
      **_params** - any
        this parameter is ignored, but needed by caller function.

    :return: S_OK
    '''

        syncSites = self._syncSites()
        if not syncSites['OK']:
            gLogger.error(syncSites['Message'])

        syncResources = self._syncResources()
        if not syncResources['OK']:
            gLogger.error(syncResources['Message'])

        syncNodes = self._syncNodes()
        if not syncNodes['OK']:
            gLogger.error(syncNodes['Message'])

        return S_OK()

    def _syncSites(self):
        '''
      Sync sites: compares CS with DB and does the necessary modifications.
    '''

        gLogger.info('-- Synchronizing sites --')

        # sites in CS
        res = getSites()
        if not res['OK']:
            return res
        sitesCS = res['Value']

        gLogger.verbose('%s sites found in CS' % len(sitesCS))

        # sites in RSS
        result = self.rStatus.selectStatusElement('Site',
                                                  'Status',
                                                  meta={'columns': ['Name']})
        if not result['OK']:
            return result
        sitesDB = [siteDB[0] for siteDB in result['Value']]

        # Sites that are in DB but not (anymore) in CS
        toBeDeleted = list(set(sitesDB).difference(set(sitesCS)))
        gLogger.verbose('%s sites to be deleted' % len(toBeDeleted))

        # Delete sites
        for siteName in toBeDeleted:
            deleteQuery = self.rStatus._extermineStatusElement(
                'Site', siteName)
            gLogger.verbose('Deleting site %s' % siteName)
            if not deleteQuery['OK']:
                return deleteQuery

        # Sites that are in CS but not (anymore) in DB
        toBeAdded = list(set(sitesCS).difference(set(sitesDB)))
        gLogger.verbose('%s site entries to be added' % len(toBeAdded))

        for site in toBeAdded:
            query = self.rStatus.addIfNotThereStatusElement(
                'Site',
                'Status',
                name=site,
                statusType='all',
                status=self.defaultStatus,
                elementType='Site',
                tokenOwner=self.tokenOwner,
                reason='Synchronized')
            if not query['OK']:
                return query

        return S_OK()

    def _syncResources(self):
        '''
      Sync resources: compares CS with DB and does the necessary modifications.
      ( StorageElements, FTS, FileCatalogs and ComputingElements )
    '''

        gLogger.info('-- Synchronizing Resources --')

        gLogger.verbose('-> StorageElements')
        ses = self.__syncStorageElements()
        if not ses['OK']:
            gLogger.error(ses['Message'])

        gLogger.verbose('-> FTS')
        fts = self.__syncFTS()
        if not fts['OK']:
            gLogger.error(fts['Message'])

        gLogger.verbose('-> FileCatalogs')
        fileCatalogs = self.__syncFileCatalogs()
        if not fileCatalogs['OK']:
            gLogger.error(fileCatalogs['Message'])

        gLogger.verbose('-> ComputingElements')
        computingElements = self.__syncComputingElements()
        if not computingElements['OK']:
            gLogger.error(computingElements['Message'])

        gLogger.verbose('-> removing resources that no longer exist in the CS')
        removingResources = self.__removeNonExistingResourcesFromRM()
        if not removingResources['OK']:
            gLogger.error(removingResources['Message'])

        # FIXME: VOMS

        return S_OK()

    def _syncNodes(self):
        '''
      Sync resources: compares CS with DB and does the necessary modifications.
      ( Queues )
    '''
        gLogger.info('-- Synchronizing Nodes --')

        gLogger.verbose('-> Queues')
        queues = self.__syncQueues()
        if not queues['OK']:
            gLogger.error(queues['Message'])

        return S_OK()

    def __removeNonExistingResourcesFromRM(self):
        '''
      Remove resources from DowntimeCache table that no longer exist in the CS.
    '''

        if not getServiceURL("ResourceStatus/ResourceManagement"):
            gLogger.verbose(
                'ResourceManagement is not installed, skipping removal of non existing resources...'
            )
            return S_OK()

        sesHosts = getStorageElementsHosts()
        if not sesHosts['OK']:
            return sesHosts
        sesHosts = sesHosts['Value']

        resources = sesHosts

        ftsServer = getFTS3Servers(hostOnly=True)
        if ftsServer['OK']:
            resources.extend(ftsServer['Value'])

        ce = CSHelpers.getComputingElements()
        if ce['OK']:
            resources.extend(ce['Value'])

        downtimes = self.rManagement.selectDowntimeCache()

        if not downtimes['OK']:
            return downtimes

        # Remove hosts that no longer exist in the CS
        for host in downtimes['Value']:
            gLogger.verbose('Checking if %s is still in the CS' % host[0])
            if host[0] not in resources:
                gLogger.verbose('%s is no longer in CS, removing entry...' %
                                host[0])
                result = self.rManagement.deleteDowntimeCache(name=host[0])

                if not result['OK']:
                    return result

        return S_OK()

    def __syncComputingElements(self):
        '''
      Sync ComputingElements: compares CS with DB and does the necessary modifications.
    '''

        cesCS = CSHelpers.getComputingElements()
        if not cesCS['OK']:
            return cesCS
        cesCS = cesCS['Value']

        gLogger.verbose('%s Computing elements found in CS' % len(cesCS))

        cesDB = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='ComputingElement',
            meta={'columns': ['Name']})
        if not cesDB['OK']:
            return cesDB
        cesDB = [ceDB[0] for ceDB in cesDB['Value']]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(cesDB).difference(set(cesCS)))
        gLogger.verbose('%s Computing elements to be deleted' %
                        len(toBeDeleted))

        # Delete storage elements
        for ceName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', ceName)

            gLogger.verbose('... %s' % ceName)
            if not deleteQuery['OK']:
                return deleteQuery

        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType('ComputingElement')

        result = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='ComputingElement',
            meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        cesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        cesStatusTuples = [(se, statusType) for se in cesCS
                           for statusType in statusTypes]
        toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple)))

        gLogger.debug('%s Computing elements entries to be added' %
                      len(toBeAdded))

        for ceTuple in toBeAdded:

            _name = ceTuple[0]
            _statusType = ceTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'ComputingElement'

            query = self.rStatus.addIfNotThereStatusElement(
                'Resource',
                'Status',
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncFileCatalogs(self):
        '''
      Sync FileCatalogs: compares CS with DB and does the necessary modifications.
    '''

        catalogsCS = CSHelpers.getFileCatalogs()
        if not catalogsCS['OK']:
            return catalogsCS
        catalogsCS = catalogsCS['Value']

        gLogger.verbose('%s File catalogs found in CS' % len(catalogsCS))

        catalogsDB = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='Catalog',
            meta={'columns': ['Name']})
        if not catalogsDB['OK']:
            return catalogsDB
        catalogsDB = [catalogDB[0] for catalogDB in catalogsDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS)))
        gLogger.verbose('%s File catalogs to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for catalogName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', catalogName)

            gLogger.verbose('... %s' % catalogName)
            if not deleteQuery['OK']:
                return deleteQuery

        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]
        statusTypes = self.rssConfig.getConfigStatusType('Catalog')

        result = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='Catalog',
            meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        catalogsStatusTuples = [(se, statusType) for se in catalogsCS
                                for statusType in statusTypes]
        toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s File catalogs entries to be added' %
                        len(toBeAdded))

        for catalogTuple in toBeAdded:

            _name = catalogTuple[0]
            _statusType = catalogTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'Catalog'

            query = self.rStatus.addIfNotThereStatusElement(
                'Resource',
                'Status',
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncFTS(self):
        '''
      Sync FTS: compares CS with DB and does the necessary modifications.
    '''

        ftsCS = CSHelpers.getFTS()
        if not ftsCS['OK']:
            return ftsCS
        ftsCS = ftsCS['Value']

        gLogger.verbose('%s FTS endpoints found in CS' % len(ftsCS))

        ftsDB = self.rStatus.selectStatusElement('Resource',
                                                 'Status',
                                                 elementType='FTS',
                                                 meta={'columns': ['Name']})
        if not ftsDB['OK']:
            return ftsDB
        ftsDB = [fts[0] for fts in ftsDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(ftsDB).difference(set(ftsCS)))
        gLogger.verbose('%s FTS endpoints to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for ftsName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', ftsName)

            gLogger.verbose('... %s' % ftsName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('FTS')
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='FTS',
            meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        ftsStatusTuples = [(se, statusType) for se in ftsCS
                           for statusType in statusTypes]
        toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s FTS endpoints entries to be added' %
                        len(toBeAdded))

        for ftsTuple in toBeAdded:

            _name = ftsTuple[0]
            _statusType = ftsTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'FTS'

            query = self.rStatus.addIfNotThereStatusElement(
                'Resource',
                'Status',
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncStorageElements(self):
        '''
      Sync StorageElements: compares CS with DB and does the necessary modifications.
    '''

        sesCS = DMSHelpers().getStorageElements()

        gLogger.verbose('%s storage elements found in CS' % len(sesCS))

        sesDB = self.rStatus.selectStatusElement('Resource',
                                                 'Status',
                                                 elementType='StorageElement',
                                                 meta={'columns': ['Name']})
        if not sesDB['OK']:
            return sesDB
        sesDB = [seDB[0] for seDB in sesDB['Value']]

        # StorageElements that are in DB but not in CS
        toBeDeleted = list(set(sesDB).difference(set(sesCS)))
        gLogger.verbose('%s storage elements to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for sesName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Resource', sesName)

            gLogger.verbose('... %s' % sesName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('StorageElement')
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ]

        result = self.rStatus.selectStatusElement(
            'Resource',
            'Status',
            elementType='StorageElement',
            meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        sesTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        sesStatusTuples = [(se, statusType) for se in sesCS
                           for statusType in statusTypes]
        toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple)))

        gLogger.verbose('%s storage element entries to be added' %
                        len(toBeAdded))

        for seTuple in toBeAdded:

            _name = seTuple[0]
            _statusType = seTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'StorageElement'

            query = self.rStatus.addIfNotThereStatusElement(
                'Resource',
                'Status',
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason)
            if not query['OK']:
                return query

        return S_OK()

    def __syncQueues(self):
        '''
      Sync Queues: compares CS with DB and does the necessary modifications.
    '''

        queuesCS = CSHelpers.getQueuesRSS()
        if not queuesCS['OK']:
            return queuesCS
        queuesCS = queuesCS['Value']

        gLogger.verbose('%s Queues found in CS' % len(queuesCS))

        queuesDB = self.rStatus.selectStatusElement('Node',
                                                    'Status',
                                                    elementType='Queue',
                                                    meta={'columns': ['Name']})
        if not queuesDB['OK']:
            return queuesDB
        queuesDB = [queueDB[0] for queueDB in queuesDB['Value']]

        # ComputingElements that are in DB but not in CS
        toBeDeleted = list(set(queuesDB).difference(set(queuesCS)))
        gLogger.verbose('%s Queues to be deleted' % len(toBeDeleted))

        # Delete storage elements
        for queueName in toBeDeleted:

            deleteQuery = self.rStatus._extermineStatusElement(
                'Node', queueName)

            gLogger.verbose('... %s' % queueName)
            if not deleteQuery['OK']:
                return deleteQuery

        statusTypes = self.rssConfig.getConfigStatusType('Queue')
        # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ]

        result = self.rStatus.selectStatusElement(
            'Node',
            'Status',
            elementType='Queue',
            meta={'columns': ['Name', 'StatusType']})
        if not result['OK']:
            return result
        queueTuple = [(x[0], x[1]) for x in result['Value']]

        # For each ( se, statusType ) tuple not present in the DB, add it.
        queueStatusTuples = [(se, statusType) for se in queuesCS
                             for statusType in statusTypes]
        toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple)))

        gLogger.verbose('%s Queue entries to be added' % len(toBeAdded))

        for queueTuple in toBeAdded:

            _name = queueTuple[0]
            _statusType = queueTuple[1]
            _status = self.defaultStatus
            _reason = 'Synchronized'
            _elementType = 'Queue'

            query = self.rStatus.addIfNotThereStatusElement(
                'Node',
                'Status',
                name=_name,
                statusType=_statusType,
                status=_status,
                elementType=_elementType,
                tokenOwner=self.tokenOwner,
                reason=_reason)
            if not query['OK']:
                return query

        return S_OK()
Exemplo n.º 25
0
class SummarizeLogsAgent( AgentModule ):

  # Date format in database
  __dateFormat = '%Y-%m-%d %H:%M:%S'

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''

    AgentModule.__init__( self, *args, **kwargs )

    self.rsClient = None

  def initialize( self ):
    ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

    self.rsClient = ResourceStatusClient()

    return S_OK()

  def execute( self ):

    # FIXME: probably this can be obtained from RssConfiguration instead
    elements = ( 'Site', 'Resource', 'Node' )

    # We do not want neither minutes, nor seconds nor microseconds
    thisHour = datetime.utcnow().replace( microsecond = 0 )
    thisHour = thisHour.replace( second = 0 ).replace( minute = 0 )

    for element in elements:

      self.log.info( 'Summarizing %s' % element )

      selectLogElements = self._selectLogElements( element, thisHour )
      if not selectLogElements[ 'OK' ]:
        self.log.error( selectLogElements[ 'Message' ] )
        continue
      selectLogElements = selectLogElements[ 'Value' ]

      for selectedKey, selectedItem in selectLogElements.items():

        sRes = self._logSelectedLogElement( element, selectedKey, selectedItem, thisHour )
        if not sRes[ 'OK' ]:
          self.log.error( sRes[ 'Message' ] )
          break

    return S_OK()

  def _selectLogElements( self, element, thisHour ):
    '''
      For a given element, selects all the entries on the <element>Log table
      with LastCheckTime > <lastHour>. It groups them by tuples of
      ( <name>, <statusType> ) and keeps only the statuses that represent
      a change in the status.
    '''

    lastHour = thisHour - timedelta( hours = 1 )

    selectResults = self.rsClient.selectStatusElement( element, 'Log',
                                                       meta = { 'newer' : ( 'LastCheckTime', lastHour ) } )
    if not selectResults[ 'OK' ]:
      return selectResults

    selectedItems = {}
    selectColumns = selectResults[ 'Columns' ]
    selectResults = selectResults[ 'Value' ]

    for selectResult in selectResults:

      elementDict = dict( zip( selectColumns, selectResult ) )

      if elementDict[ 'LastCheckTime' ] > thisHour:
        continue

      key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] )

      if not key in selectedItems:
        selectedItems[ key ] = [ elementDict ]
      else:
        lastStatus = selectedItems[ key ][ -1 ][ 'Status' ]
        if lastStatus != elementDict[ 'Status' ]:
          selectedItems[ key ].append( elementDict )

    return S_OK( selectedItems )

  def _logSelectedLogElement( self, element, selectedKey, selectedItem, thisHour ):
    '''
      Given an element, a selectedKey - which is a tuple ( <name>, <statusType> )
      and a list of dictionaries, this method inserts them. Before inserting
      them, checks whether the first one is or is not on the <element>History
      table. If it is, it is not inserted.
    '''

    name, statusType = selectedKey

    selectedRes = self.rsClient.selectStatusElement( element, 'History', name,
                                                     statusType,
                                                     meta = { 'columns' : [ 'Status', 'LastCheckTime' ] } )

    if not selectedRes[ 'OK' ]:
      return selectedRes
    selectedRes = selectedRes[ 'Value' ]

    selectedStatus = None
    if selectedRes:

      # Get the last selectedRes, which will be the newest one. Each selectedRes
      # is a tuple, in this case, containing two elements - Status, LastCheckTime
      selectedStatus, selectedLastTime = selectedRes[ -1 ]

      if selectedLastTime > thisHour - timedelta( hours = 1 ):
        return S_ERROR( 'The agent has run once on this time span, skipping' )

    # If the first of the selected items has a different status than the latest
    # on the history, we add it.
    if selectedItem[ 0 ][ 'Status' ] != selectedStatus:

      res = self._logToHistoryTable( element, selectedItem[ 0 ] )
      if not res[ 'OK' ]:
        return res

    for selectedItemDict in selectedItem[ 1: ]:

      res = self._logToHistoryTable( element, selectedItemDict )
      if not res[ 'OK' ]:
        return res

    return S_OK()

  def _logToHistoryTable( self, element, elementDict ):
    '''
      Given an element and a dictionary with all the arguments, this method
      inserts a new entry on the <element>History table
    '''

    try:

      name = elementDict[ 'Name' ]
      statusType = elementDict[ 'StatusType' ]
      status = elementDict[ 'Status' ]
      elementType = elementDict[ 'ElementType' ]
      reason = elementDict[ 'Reason' ]
      dateEffective = elementDict[ 'DateEffective' ]
      lastCheckTime = elementDict[ 'LastCheckTime' ]
      tokenOwner = elementDict[ 'TokenOwner' ]
      tokenExpiration = elementDict[ 'TokenExpiration' ]

    except KeyError, e:
      return S_ERROR( e )

    return self.rsClient.insertStatusElement( element, 'History', name, statusType,
                                              status, elementType, reason,
                                              dateEffective, lastCheckTime,
                                              tokenOwner, tokenExpiration )
Exemplo n.º 26
0
class ElementInspectorAgent(AgentModule):
    '''
    The ElementInspector agent is a generic agent used to check the elements
    of one of the elementTypes ( e.g. Site, Resource, Node ).

    This Agent takes care of the Elements. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.
  '''

    # Max number of worker threads by default
    __maxNumberOfThreads = 5
    # ElementType, to be defined among Site, Resource or Node
    __elementType = None
    # Inspection freqs, defaults, the lower, the higher priority to be checked.
    # Error state usually means there is a glitch somewhere, so it has the highest
    # priority.
    __checkingFreqs = {
        'Default': {
            'Active': 60,
            'Degraded': 30,
            'Probing': 30,
            'Banned': 30,
            'Unknown': 15,
            'Error': 15
        }
    }
    # queue size limit to stop feeding
    __limitQueueFeeder = 15

    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''

        AgentModule.__init__(self, *args, **kwargs)

        # members initialization

        self.maxNumberOfThreads = self.__maxNumberOfThreads
        self.elementType = self.__elementType
        self.checkingFreqs = self.__checkingFreqs
        self.limitQueueFeeder = self.__limitQueueFeeder

        self.elementsToBeChecked = None
        self.threadPool = None
        self.rsClient = None
        self.clients = {}

    def initialize(self):
        ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

        self.maxNumberOfThreads = self.am_getOption('maxNumberOfThreads',
                                                    self.maxNumberOfThreads)
        self.elementType = self.am_getOption('elementType', self.elementType)
        self.checkingFreqs = self.am_getOption('checkingFreqs',
                                               self.checkingFreqs)
        self.limitQueueFeeder = self.am_getOption('limitQueueFeeder',
                                                  self.limitQueueFeeder)

        self.elementsToBeChecked = Queue.Queue()
        self.threadPool = ThreadPool(self.maxNumberOfThreads,
                                     self.maxNumberOfThreads)

        self.rsClient = ResourceStatusClient()

        self.clients['ResourceStatusClient'] = self.rsClient
        self.clients['ResourceManagementClient'] = ResourceManagementClient()
        self.clients['PilotsDB'] = PilotAgentsDB()

        return S_OK()

    def execute(self):

        # If there are elements in the queue to be processed, we wait ( we know how
        # many elements in total we can have, so if there are more than 15% of them
        # on the queue, we do not add anything ), but the threads are running and
        # processing items from the queue on background.

        qsize = self.elementsToBeChecked.qsize()
        if qsize > self.limitQueueFeeder:
            self.log.warn(
                'Queue not empty ( %s > %s ), skipping feeding loop' %
                (qsize, self.limitQueueFeeder))
            return S_OK()

        # We get all the elements, then we filter.
        elements = self.rsClient.selectStatusElement(self.elementType,
                                                     'Status')
        if not elements['OK']:
            self.log.error(elements['Message'])
            return elements

        utcnow = datetime.datetime.utcnow().replace(microsecond=0)

        # filter elements by Type
        for element in elements['Value']:

            # Maybe an overkill, but this way I have NEVER again to worry about order
            # of elements returned by mySQL on tuples
            elemDict = dict(zip(elements['Columns'], element))

            # We skip the elements with token different than "rs_svc"
            if elemDict['TokenOwner'] != 'rs_svc':
                self.log.info('Skipping %s ( %s ) with token %s' %
                              (elemDict['Name'], elemDict['StatusType'],
                               elemDict['TokenOwner']))
                continue

            if not elemDict['ElementType'] in self.checkingFreqs:
                #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] )
                timeToNextCheck = self.checkingFreqs['Default'][
                    elemDict['Status']]
            else:
                timeToNextCheck = self.checkingFreqs[elemDict['ElementType']][
                    elemDict['Status']]

            if utcnow - datetime.timedelta(
                    minutes=timeToNextCheck) > elemDict['LastCheckTime']:

                # We are not checking if the item is already on the queue or not. It may
                # be there, but in any case, it is not a big problem.

                lowerElementDict = {'element': self.elementType}
                for key, value in elemDict.items():
                    lowerElementDict[key[0].lower() + key[1:]] = value

                # We add lowerElementDict to the queue
                self.elementsToBeChecked.put(lowerElementDict)
                self.log.verbose('%s # "%s" # "%s" # %s # %s' %
                                 (elemDict['Name'], elemDict['ElementType'],
                                  elemDict['StatusType'], elemDict['Status'],
                                  elemDict['LastCheckTime']))

        # Measure size of the queue, more or less, to know how many threads should
        # we start !
        queueSize = self.elementsToBeChecked.qsize()
        # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil )
        threadsToStart = max(
            min(self.maxNumberOfThreads, (queueSize / 30) + 1), 1)
        threadsRunning = self.threadPool.numWorkingThreads()

        self.log.info('Needed %d threads to process %d elements' %
                      (threadsToStart, queueSize))
        if threadsRunning:
            self.log.info('Already %d threads running' % threadsRunning)
            threadsToStart = max(0, threadsToStart - threadsRunning)
            self.log.info('Starting %d threads to process %d elements' %
                          (threadsToStart, queueSize))

        # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps
        # running. In next loop we will start a new thread, and will be called 0
        # again. To have a mechanism to see which thread is where, we append the
        # cycle number before the threadId.
        cycle = self._AgentModule__moduleProperties['cyclesDone']

        for _x in xrange(threadsToStart):
            threadId = '%s_%s' % (cycle, _x)
            jobUp = self.threadPool.generateJobAndQueueIt(self._execute,
                                                          args=(threadId, ))
            if not jobUp['OK']:
                self.log.error(jobUp['Message'])

        return S_OK()

    def finalize(self):

        self.log.info('draining queue... blocking until empty')
        # block until all tasks are done
        self.elementsToBeChecked.join()

        return S_OK()

## Private methods #############################################################

    def _execute(self, threadNumber):
        '''
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    '''

        tHeader = '%sJob%s' % ('* ' * 30, threadNumber)

        self.log.info('%s UP' % tHeader)

        pep = PEP(clients=self.clients)

        while True:

            try:
                element = self.elementsToBeChecked.get_nowait()
            except Queue.Empty:
                self.log.info('%s DOWN' % tHeader)
                return S_OK()

            self.log.info(
                '%s ( %s / %s ) being processed' %
                (element['name'], element['status'], element['statusType']))

            resEnforce = pep.enforce(element)
            if not resEnforce['OK']:
                self.log.error(resEnforce['Message'])
                self.elementsToBeChecked.task_done()
                continue

            resEnforce = resEnforce['Value']

            oldStatus = resEnforce['decissionParams']['status']
            statusType = resEnforce['decissionParams']['statusType']
            newStatus = resEnforce['policyCombinedResult']['Status']
            reason = resEnforce['policyCombinedResult']['Reason']

            if oldStatus != newStatus:
                self.log.info('%s (%s) is now %s ( %s ), before %s' %
                              (element['name'], statusType, newStatus, reason,
                               oldStatus))

            # Used together with join !
            self.elementsToBeChecked.task_done()

        self.log.info('%s DOWN' % tHeader)

        return S_OK()
Exemplo n.º 27
0
class Statistics(object):
    """
  Statistics class that provides helpers to extract information from the database
  more easily.
  """
    def __init__(self):
        """
    Constructor
    """

        self.rsClient = ResourceStatusClient()
        #self.rmClient = ResourceManagementClient()

    def getElementHistory(self,
                          element,
                          elementName,
                          statusType,
                          oldAs=None,
                          newAs=None):
        """
    Returns the succession of statuses and the dates since they are effective. The
    values are comprised in the time interval [ oldAs, newAs ]. If not specified,
    all values up to the present are returned.

    It returns a list of tuples, of which the first element is the Status and the
    second one the time-stamp since it is effective. Note that the time-stamps will
    not necessarily match the time window.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **oldAs** - [ None, `datetime` ]
        datetime with the start point for the time window. If not specified, it
        is used the oldest time in the history.
      **newAs** - [ None, `datetime` ]
        datetime with the end point for the time window. If not specified, it
        is used datetime.utcnow.

    :return: S_OK( [ (StatusA, datetimeA),(StatusB,datetimeB) ] ) | S_ERROR
    """

        # Checks we are not passing a silly element ( we only accept Site, Resource and Node )
        if element not in getValidElements():
            return S_ERROR('"%s" is not a valid element' % element)

        # FIXME: read below
        # Gets all elements in history. If the history is long, this query is going to
        # be rather heavy...
        result = self.rsClient.selectStatusElement(
            element,
            'History',
            name=elementName,
            statusType=statusType,
            meta={'columns': ['Status', 'DateEffective']})
        if not result['OK']:
            return result
        result = result['Value']

        if not result:
            return S_OK([])

        # To avoid making exceptions in the for-loop, we feed history with the first
        # item in the results
        history = [result[0]]

        # Sets defaults.
        # OldAs is as old as datetime.min if not defined.

        #oldAs = ( 1 and oldAs ) or history[ 0 ][ 1 ]
        oldAs = (1 and oldAs) or datetime.datetime.min

        # NewAs is as new as as set or datetime.now
        newAs = (1 and newAs) or datetime.datetime.utcnow()

        # Sanity check: no funny time windows
        if oldAs > newAs:
            return S_ERROR("oldAs (%s) > newAs (%s)" % (oldAs, newAs))

        # This avoids that the window finishes before having the first point in the
        # history.
        if history[0][1] > newAs:
            return S_OK([])

        # Iterate starting from the second element in the list. The elements in the
        # list are SORTED. Otherwise, the break statement would be a mess. And same
        # applies for the elif
        for historyElement in result[1:]:

            # If the point is newer than the superior limit of the window, we are done.
            if historyElement[1] > newAs:
                break
            # If the point is older than the window lower limit, we buffer it. We just
            # want the closest point to the lower limit.
            elif historyElement[1] <= oldAs:
                history = [historyElement]
            # Otherwise, we add it to the history
            else:
                history.append(historyElement)

        return S_OK(history)

    def getElementStatusAt(self, element, elementName, statusType, statusTime):
        """
    Returns the status of the <element><elementName><statusType> at the given
    time <statusTime>. If not know, will return an empty list. If known, will
    return a tuple with two elements: Status and time since it is effective.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **statusTime** - `datetime`
        datetime when we want to know the status of <element><elementName><statusType>

    :return: S_OK( (StatusA, datetimeA) ) | S_ERROR
    """

        result = self.getElementHistory(element, elementName, statusType,
                                        statusTime, statusTime)
        if not result['OK']:
            return result
        result = result['Value']

        if result:
            result = list(result[0])

        return S_OK(result)

    def getElementStatusTotalTimes(self,
                                   element,
                                   elementName,
                                   statusType,
                                   oldAs=None,
                                   newAs=None):
        """
    Returns a dictionary with all the possible statuses as keys and as values the
    number of seconds that <element><elementName><statusType> hold it for a time
    window between [ oldAs, newAs ]. If oldAs is not defined, it is considered
    as datetime.min. If newAs is not defined, it is considered datetime.utcnow.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **oldAs** - [ None, `datetime` ]
        datetime with the start point for the time window. If not specified, it
        is used the oldest time in the history.
      **newAs** - [ None, `datetime` ]
        datetime with the end point for the time window. If not specified, it
        is used datetime.utcnow.

    :return: S_OK( [ { StatusA : secondsA },{ StatusB : secondsB } ] ) | S_ERROR
    """

        # Gets all history withing the window
        result = self.getElementHistory(element, elementName, statusType,
                                        oldAs, newAs)
        if not result['OK']:
            return result
        result = result['Value']

        # Dictionary to be returned
        statusCounter = dict.fromkeys(getValidStatus()['Value'], 0)

        # If history is empty, return empty dictionary
        if not result:
            return S_OK(statusCounter)

        # Set defaults
        oldAs = (1 and oldAs) or datetime.datetime.min
        newAs = (1 and newAs) or datetime.datetime.utcnow()

        # If users are not behaving well, we force newAs to not be in the future.
        newAs = min(newAs, datetime.datetime.utcnow())

        # Iterate over the results in tuples.
        for statusTuple in zip(result, result[1:]):

            # Make sure the time taken as base is not older than the lower limit of
            # the window. In principle, this should be only checked on the first element,
            # but it is harmless anyway and cleaner than the if-else.
            startingPoint = max(statusTuple[0][1], oldAs)

            # Get number of seconds and add them
            statusCounter[statusTuple[0][0]] += timedelta_to_seconds(
                statusTuple[1][1] - startingPoint)

        # The method selected to iterate over the results does not take into account the
        # last one. Gets the time using as lower limit the window lower limit. This applies
        # when we have only one element in the list for example.
        statusCounter[result[-1][0]] += timedelta_to_seconds(
            newAs - max(result[-1][1], oldAs))

        return S_OK(statusCounter)
Exemplo n.º 28
0
def setToken( user ):
  '''
    Function that gets the user token, sets the validity for it. Gets the elements
    in the database for a given name and statusType(s). Then updates the status
    of all them adding a reason and the token.
  '''

  rssClient = ResourceStatusClient()

  # This is a little bit of a nonsense, and certainly needs to be improved.
  # To modify a list of elements, we have to do it one by one. However, the
  # modify method does not discover the StatusTypes ( which in this script is
  # an optional parameter ). So, we get them from the DB and iterate over them.
  elements = rssClient.selectStatusElement( switchDict[ 'element' ], 'Status',
                                            name       = switchDict[ 'name' ],
                                            statusType = switchDict[ 'statusType' ],
                                            meta = { 'columns' : [ 'StatusType', 'TokenOwner' ] } )

  if not elements[ 'OK']:
    return elements
  elements = elements[ 'Value' ]

  # If there list is empty they do not exist on the DB !
  if not elements:
    subLogger.warn( 'Nothing found for %s, %s, %s' % ( switchDict[ 'element' ],
                                                       switchDict[ 'name' ],
                                                       switchDict[ 'statusType' ] ) )
    return S_OK()

  # If we want to release the token
  if switchDict[ 'releaseToken' ] != False:
    tokenExpiration = datetime.max
    newTokenOwner   = 'rs_svc'
  else:
    tokenExpiration = datetime.utcnow().replace( microsecond = 0 ) + timedelta( days = 1 )
    newTokenOwner   = user

  subLogger.info( 'New token : %s until %s' % ( newTokenOwner, tokenExpiration ) )

  for statusType, tokenOwner in elements:

    # If a user different than the one issuing the command and RSS
    if tokenOwner != user and tokenOwner != 'rs_svc':
      subLogger.info( '%s(%s) belongs to the user: %s' % ( switchDict[ 'name' ], statusType, tokenOwner ) )

    # does the job
    result = rssClient.modifyStatusElement( switchDict[ 'element' ], 'Status',
                                            name       = switchDict[ 'name' ],
                                            statusType = statusType,
                                            reason     = switchDict[ 'reason'],
                                            tokenOwner = newTokenOwner,
                                            tokenExpiration = tokenExpiration )
    if not result[ 'OK' ]:
      return result

    if tokenOwner == newTokenOwner:
      msg = '(extended)'
    elif newTokenOwner == 'rs_svc':
      msg = '(released)'
    else:
      msg = '(aquired from %s)' % tokenOwner

    subLogger.info( '%s:%s %s' % ( switchDict[ 'name' ], statusType, msg ) )
  return S_OK()
Exemplo n.º 29
0
class SiteStatus(object):
    """
  RSS helper to interact with the 'Site' family on the DB. It provides the most
  demanded functions and a cache to avoid hitting the server too often.

  It provides four methods to interact with the site statuses:
  * getSiteStatuses
  * isUsableSite
  * getUsableSites
  * getSites
  """

    __metaclass__ = DIRACSingleton

    def __init__(self):
        """
    Constructor, initializes the rssClient.
    """

        self.log = gLogger.getSubLogger(self.__class__.__name__)
        self.rssConfig = RssConfiguration()
        self.__opHelper = Operations()
        self.rssFlag = ResourceStatus().rssFlag
        self.rsClient = ResourceStatusClient()

        cacheLifeTime = int(self.rssConfig.getConfigCache())

        # RSSCache only affects the calls directed to RSS, if using the CS it is not used.
        self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache)

    def __updateRssCache(self):
        """ Method used to update the rssCache.

        It will try 5 times to contact the RSS before giving up
    """

        meta = {'columns': ['Name', 'Status']}

        for ti in xrange(5):
            rawCache = self.rsClient.selectStatusElement('Site',
                                                         'Status',
                                                         meta=meta)
            if rawCache['OK']:
                break
            self.log.warn("Can't get resource's status",
                          rawCache['Message'] + "; trial %d" % ti)
            sleep(math.pow(ti, 2))
            self.rsClient = ResourceStatusClient()

        if not rawCache['OK']:
            return rawCache
        return S_OK(getCacheDictFromRawData(rawCache['Value']))

    def getSiteStatuses(self, siteNames=None):
        """
    Method that queries the database for status of the sites in a given list.
    A single string site name may also be provides as "siteNames"
    If the input is None, it is interpreted as * ( all ).

    If match is positive, the output looks like:
    {
     'test1.test1.org': 'Active',
     'test2.test2.org': 'Banned',
    }

    examples
      >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' }  )
      >>> siteStatus.getSiteStatuses( 'NotExists')
          S_ERROR( ... ))
      >>> siteStatus.getSiteStatuses( None )
          S_OK( { 'test1.test1.org': 'Active',
                  'test2.test2.net': 'Banned', },
                  ...
                }
              )

    :Parameters:
      **siteNames** - `list` or `str`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        if self.rssFlag:
            return self.__getRSSSiteStatus(siteNames)
        else:
            siteStatusDict = {}
            wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
            if siteNames:
                if isinstance(siteNames, basestring):
                    siteNames = [siteNames]
                for siteName in siteNames:
                    result = wmsAdmin.getSiteMaskStatus(siteName)
                    if not result['OK']:
                        return result
                    else:
                        siteStatusDict[siteName] = result['Value']
            else:
                result = wmsAdmin.getSiteMaskStatus()
                if not result['OK']:
                    return result
                else:
                    siteStatusDict = result['Value']

            return S_OK(siteStatusDict)

    def __getRSSSiteStatus(self, siteName=None):
        """ Gets from the cache or the RSS the Sites status. The cache is a
        copy of the DB table. If it is not on the cache, most likely is not going
        to be on the DB.

        There is one exception: item just added to the CS, e.g. new Element.
        The period between it is added to the DB and the changes are propagated
        to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime>
        minutes.

    :param siteName: name of the site
    :type siteName: str

    :return: dict
    """

        cacheMatch = self.rssCache.match(siteName, '', '')

        self.log.debug('__getRSSSiteStatus')
        self.log.debug(cacheMatch)

        return cacheMatch

    def getUsableSites(self, siteNames=None):
        """
    Returns all sites that are usable if their
    statusType is either Active or Degraded; in a list.

    examples
      >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getUsableSites( None )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] )
      >>> siteStatus.getUsableSites( 'NotExists' )
          S_ERROR( ... )

    :Parameters:
      **siteNames** - `List` or `str`
        name(s) of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        siteStatusDictRes = self.getSiteStatuses(siteNames)
        if not siteStatusDictRes['OK']:
            return siteStatusDictRes
        siteStatusList = [
            x[0] for x in siteStatusDictRes['Value'].iteritems()
            if x[1] in ['Active', 'Degraded']
        ]

        return S_OK(siteStatusList)

    def getSites(self, siteState='Active'):
        """
    By default, it gets the currently active site list

    examples
      >>> siteStatus.getSites()
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Active' )
          S_OK( ['test1.test1.uk', 'test3.test3.org'] )
      >>> siteStatus.getSites( 'Banned' )
          S_OK( ['test0.test0.uk', ... ] )
      >>> siteStatus.getSites( 'All' )
          S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] )
      >>> siteStatus.getSites( None )
          S_ERROR( ... )

    :Parameters:
      **siteState** - `String`
        state of the sites to be matched

    :return: S_OK() || S_ERROR()
    """

        if not siteState:
            return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty')

        siteStatusDictRes = self.getSiteStatuses()
        if not siteStatusDictRes['OK']:
            return siteStatusDictRes

        if siteState.capitalize() == 'All':
            # if no siteState is set return everything
            siteList = list(siteStatusDictRes['Value'])

        else:
            # fix case sensitive string
            siteState = siteState.capitalize()
            allowedStateList = [
                'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'
            ]
            if siteState not in allowedStateList:
                return S_ERROR(errno.EINVAL,
                               'Not a valid status, parameter rejected')

            siteList = [
                x[0] for x in siteStatusDictRes['Value'].iteritems()
                if x[1] == siteState
            ]

        return S_OK(siteList)

    def setSiteStatus(self, site, status, comment='No comment'):
        """
    Set the status of a site in the 'SiteStatus' table of RSS

    examples
      >>> siteStatus.banSite( 'site1.test.test' )
          S_OK()
      >>> siteStatus.banSite( None )
          S_ERROR( ... )

    :Parameters:
      **site** - `String`
        the site that is going to be banned
      **comment** - `String`
        reason for banning

    :return: S_OK() || S_ERROR()
    """

        if not status:
            return S_ERROR(DErrno.ERESUNK, 'status parameter is empty')

        # fix case sensitive string
        status = status.capitalize()
        allowedStateList = [
            'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'
        ]

        if status not in allowedStateList:
            return S_ERROR(errno.EINVAL,
                           'Not a valid status, parameter rejected')

        if self.rssFlag:
            result = getProxyInfo()
            if result['OK']:
                tokenOwner = result['Value']['username']
            else:
                return S_ERROR("Unable to get user proxy info %s " %
                               result['Message'])

            tokenExpiration = datetime.utcnow() + timedelta(days=1)

            self.rssCache.acquireLock()
            try:
                result = self.rsClient.modifyStatusElement(
                    'Site',
                    'Status',
                    status=status,
                    name=site,
                    tokenExpiration=tokenExpiration,
                    reason=comment,
                    tokenOwner=tokenOwner)
                if result['OK']:
                    self.rssCache.refreshCache()
                else:
                    _msg = 'Error updating status of site %s to %s' % (site,
                                                                       status)
                    gLogger.warn('RSS: %s' % _msg)

            # Release lock, no matter what.
            finally:
                self.rssCache.releaseLock()

        else:
            if status in ['Active', 'Degraded']:
                result = RPCClient(
                    'WorkloadManagement/WMSAdministrator').allowSite()
            else:
                result = RPCClient(
                    'WorkloadManagement/WMSAdministrator').banSite()

        return result
Exemplo n.º 30
0
def setToken(user):
  '''
    Function that gets the user token, sets the validity for it. Gets the elements
    in the database for a given name and statusType(s). Then updates the status
    of all them adding a reason and the token.
  '''

  rssClient = ResourceStatusClient()

  # This is a little bit of a nonsense, and certainly needs to be improved.
  # To modify a list of elements, we have to do it one by one. However, the
  # modify method does not discover the StatusTypes ( which in this script is
  # an optional parameter ). So, we get them from the DB and iterate over them.
  elements = rssClient.selectStatusElement(switchDict['element'], 'Status',
                                           name=switchDict['name'],
                                           statusType=switchDict['statusType'],
                                           meta={'columns': ['StatusType', 'TokenOwner']})

  if not elements['OK']:
    return elements
  elements = elements['Value']

  # If there list is empty they do not exist on the DB !
  if not elements:
    subLogger.warn('Nothing found for %s, %s, %s' % (switchDict['element'],
                                                     switchDict['name'],
                                                     switchDict['statusType']))
    return S_OK()

  # If we want to release the token
  if switchDict['releaseToken']:
    tokenExpiration = datetime.max
    newTokenOwner = 'rs_svc'
  else:
    tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta(days=int(switchDict['days']))
    newTokenOwner = user

  subLogger.always('New token: %s --- until %s' % (newTokenOwner, tokenExpiration))

  for statusType, tokenOwner in elements:

    # If a user different than the one issuing the command and RSS
    if tokenOwner != user and tokenOwner != 'rs_svc':
      subLogger.info('%s(%s) belongs to the user: %s' % (switchDict['name'], statusType, tokenOwner))

    # does the job
    result = rssClient.modifyStatusElement(switchDict['element'], 'Status',
                                           name=switchDict['name'],
                                           statusType=statusType,
                                           reason=switchDict['reason'],
                                           tokenOwner=newTokenOwner,
                                           tokenExpiration=tokenExpiration)
    if not result['OK']:
      return result

    if tokenOwner == newTokenOwner:
      msg = '(extended)'
    elif newTokenOwner == 'rs_svc':
      msg = '(released)'
    else:
      msg = '(aquired from %s)' % tokenOwner

    subLogger.info('%s:%s %s' % (switchDict['name'], statusType, msg))
  return S_OK()
Exemplo n.º 31
0
class LogStatusAction(BaseAction):
    """
    Action that registers on the database a new entry on the <element>Status table.
    It adds or modifies if the record exists on the table.
    """
    def __init__(self,
                 name,
                 decisionParams,
                 enforcementResult,
                 singlePolicyResults,
                 clients=None):

        super(LogStatusAction,
              self).__init__(name, decisionParams, enforcementResult,
                             singlePolicyResults, clients)

        if clients is not None and "ResourceStatusClient" in clients:
            self.rsClient = clients["ResourceStatusClient"]
        else:
            self.rsClient = ResourceStatusClient()

    def run(self):
        """
        Checks it has the parameters it needs and tries to addOrModify in the
        database.
        """
        # Minor security checks

        element = self.decisionParams["element"]
        if element is None:
            return S_ERROR("element should not be None")

        name = self.decisionParams["name"]
        if name is None:
            return S_ERROR("name should not be None")

        statusType = self.decisionParams["statusType"]
        if statusType is None:
            return S_ERROR("statusType should not be None")

        status = self.enforcementResult["Status"]
        if status is None:
            return S_ERROR("status should not be None")

        elementType = self.decisionParams["elementType"]
        if elementType is None:
            return S_ERROR("elementType should not be None")

        reason = self.enforcementResult["Reason"]
        if reason is None:
            return S_ERROR("reason should not be None")

        vo = self.enforcementResult.get("VO")
        # Truncate reason to fit in database column
        reason = (reason[:508] + "..") if len(reason) > 508 else reason
        # VO = 'all' (non-VO aware policy) for a combined policy affects all VOs for a given site or resource,
        if vo == "all":
            resSelect = self.rsClient.selectStatusElement(
                element,
                "Status",
                name=name,
                statusType=None,
                vO=None,
                status=None,
                elementType=None,
                reason=None,
                dateEffective=None,
                lastCheckTime=None,
                tokenOwner="rs_svc",
                tokenExpiration=None,
                meta=None,
            )
            if not resSelect["OK"]:
                self.log.error("Could not obtain all VO rows for element: %s" %
                               element)
                return resSelect
            voColumnIndex = resSelect["Columns"].index("VO")
            for row in resSelect["Value"]:
                vo = row[voColumnIndex]
                resLogUpdate = self.rsClient.addOrModifyStatusElement(
                    element,
                    "Status",
                    name=name,
                    statusType=statusType,
                    vO=vo,
                    status=status,
                    elementType=elementType,
                    reason=reason,
                )
                self.log.debug("Update result", resLogUpdate)

        else:
            resLogUpdate = self.rsClient.addOrModifyStatusElement(
                element,
                "Status",
                name=name,
                statusType=statusType,
                vO=vo,
                status=status,
                elementType=elementType,
                reason=reason,
            )

        return resLogUpdate
Exemplo n.º 32
0
class ElementInspectorAgent( AgentModule ):
  """ ElementInspectorAgent
  
  The ElementInspector agent is a generic agent used to check the elements
  of one of the elementTypes ( e.g. Site, Resource, Node ).

  This Agent takes care of the Elements. In order to do so, it gathers
  the eligible ones and then evaluates their statuses with the PEP.
  
  """

  # Max number of worker threads by default
  __maxNumberOfThreads = 15
  
  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = { 
                     'Active'   : 20, 
                     'Degraded' : 20,  
                     'Probing'  : 20, 
                     'Banned'   : 15, 
                     'Unknown'  : 10,  
                     'Error'    : 5
                     }
  
  
  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    
    AgentModule.__init__( self, *args, **kwargs )

    # ElementType, to be defined among Site, Resource or Node
    self.elementType         = ''
    self.elementsToBeChecked = None
    self.threadPool          = None
    self.rsClient            = None
    self.clients             = {}


  def initialize( self ):
    """ Standard initialize.
    """

    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.__maxNumberOfThreads )
    self.threadPool    = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
       
    self.elementType = self.am_getOption( 'elementType', self.elementType )   
    self.rsClient    = ResourceStatusClient()

    self.clients[ 'ResourceStatusClient' ]     = self.rsClient
    self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() 

    if not self.elementType:
      return S_ERROR( 'Missing elementType' )

    return S_OK()
  
  def execute( self ):
    """ execute
    
    This is the main method of the agent. It gets the elements from the Database
    which are eligible to be re-checked, calculates how many threads should be
    started and spawns them. Each thread will get an element from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all elements have been processed.
    
    """
    
    # Gets elements to be checked ( returns a Queue ) 
    elementsToBeChecked = self.getElementsToBeChecked()
    if not elementsToBeChecked[ 'OK' ]:
      self.log.error( elementsToBeChecked[ 'Message' ] )
      return elementsToBeChecked
    self.elementsToBeChecked = elementsToBeChecked[ 'Value' ]
       
    queueSize   = self.elementsToBeChecked.qsize()
    pollingTime = self.am_getPollingTime()
    
    # Assigns number of threads on the fly such that we exhaust the PollingTime
    # without having to spawn too many threads. We assume 10 seconds per element
    # to be processed ( actually, it takes something like 1 sec per element ):
    # numberOfThreads = elements * 10(s/element) / pollingTime
    numberOfThreads = int( math.ceil( queueSize * 10. / pollingTime ) )
            
    self.log.info( 'Needed %d threads to process %d elements' % ( numberOfThreads, queueSize ) )
    
    for _x in xrange( numberOfThreads ):
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute )
      if not jobUp[ 'OK' ]:
        self.log.error( jobUp[ 'Message' ] )
        
    self.log.info( 'blocking until all elements have been processed' )
    # block until all tasks are done
    self.elementsToBeChecked.join()
    self.log.info( 'done')  
    
    return S_OK()


  def getElementsToBeChecked( self ):
    """ getElementsToBeChecked
    
    This method gets all the rows in the <self.elementType>Status table, and then
    discards entries with TokenOwner != rs_svc. On top of that, there are check
    frequencies that are applied: depending on the current status of the element,
    they will be checked more or less often.
    
    """
    
    toBeChecked = Queue.Queue()
    
    # We get all the elements, then we filter.
    elements = self.rsClient.selectStatusElement( self.elementType, 'Status' )
    if not elements[ 'OK' ]:
      return elements
      
    utcnow = datetime.datetime.utcnow().replace( microsecond = 0 )  
       
    # filter elements by Type
    for element in elements[ 'Value' ]:
      
      # Maybe an overkill, but this way I have NEVER again to worry about order
      # of elements returned by mySQL on tuples
      elemDict = dict( zip( elements[ 'Columns' ], element ) )
      
      # This if-clause skips all the elements that are should not be checked yet
      timeToNextCheck = self.__checkingFreqs[ elemDict[ 'Status' ] ]
      if utcnow <= elemDict[ 'LastCheckTime' ] + datetime.timedelta( minutes = timeToNextCheck ):
        continue
      
      # We skip the elements with token different than "rs_svc"
      if elemDict[ 'TokenOwner' ] != 'rs_svc':
        self.log.verbose( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ],
                                                                 elemDict[ 'StatusType' ],
                                                                 elemDict[ 'TokenOwner' ]
                                                               ))
        continue
              
      # We are not checking if the item is already on the queue or not. It may
      # be there, but in any case, it is not a big problem.
        
      lowerElementDict = { 'element' : self.elementType }
      for key, value in elemDict.items():
        lowerElementDict[ key[0].lower() + key[1:] ] = value
        
      # We add lowerElementDict to the queue
      toBeChecked.put( lowerElementDict )
      self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], 
                                                         elemDict[ 'ElementType' ],
                                                         elemDict[ 'StatusType' ],
                                                         elemDict[ 'Status' ],
                                                         elemDict[ 'LastCheckTime' ]) )
    return S_OK( toBeChecked )
    
        
  # Private methods ............................................................        
        
  def _execute( self ):
    """
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    """

    pep = PEP( clients = self.clients )
    
    while True:
    
      try:
        element = self.elementsToBeChecked.get_nowait()
      except Queue.Empty:
        return S_OK()
      
      self.log.verbose( '%s ( %s / %s ) being processed' % ( element[ 'name' ], 
                                                             element[ 'status' ],
                                                             element[ 'statusType' ] ) )
      
      resEnforce = pep.enforce( element )
      if not resEnforce[ 'OK' ]:
        self.log.error( 'Failed policy enforcement', resEnforce[ 'Message' ] )
        self.elementsToBeChecked.task_done()
        continue
      
      resEnforce = resEnforce[ 'Value' ]  
      
      oldStatus  = resEnforce[ 'decissionParams' ][ 'status' ]
      statusType = resEnforce[ 'decissionParams' ][ 'statusType' ]
      newStatus  = resEnforce[ 'policyCombinedResult' ][ 'Status' ]
      reason     = resEnforce[ 'policyCombinedResult' ][ 'Reason' ]
      
      if oldStatus != newStatus:
        self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], 
                                                                 statusType,
                                                                 newStatus, 
                                                                 reason, 
                                                                 oldStatus ) )
        
      # Used together with join !
      self.elementsToBeChecked.task_done()   
Exemplo n.º 33
0
class SummarizeLogsAgent( AgentModule ):
  """ SummarizeLogsAgent as extension of AgentModule.
  """

  def __init__( self, *args, **kwargs ):
    """ Constructor.

    """

    AgentModule.__init__( self, *args, **kwargs )

    self.rsClient = None


  def initialize( self ):
    """ Standard initialize.

    :return: S_OK

    """

    self.rsClient = ResourceStatusClient()
    return S_OK()


  def execute( self ):
    """ execute ( main method )

    The execute method runs over the three families of tables ( Site, Resource and
    Node ) performing identical operations. First, selects all logs for a given
    family ( and keeps track of which one is the last row ID ). It summarizes the
    logs and finally, deletes the logs from the database.

    :return: S_OK

    """

    # loop over the tables
    for element in ( 'Site', 'Resource', 'Node' ):

      self.log.info( 'Summarizing %s' % element )

      # get all logs to be summarized
      selectLogElements = self._summarizeLogs( element )
      if not selectLogElements[ 'OK' ]:
        self.log.error( selectLogElements[ 'Message' ] )
        continue

      lastID, logElements = selectLogElements[ 'Value' ]

      # logElements is a dictionary of key-value pairs as follows:
      # ( name, statusType ) : list( logs )
      for key, logs in logElements.iteritems():

        sumResult = self._registerLogs( element, key, logs )
        if not sumResult[ 'OK' ]:
          self.log.error( sumResult[ 'Message' ] )
          continue

      if lastID is not None:
        self.log.info( 'Deleting %sLog till ID %s' % ( element, lastID ) )
        deleteResult = self.rsClient.deleteStatusElement( element, 'Log',
                                                          meta = { 'older' : ( 'ID', lastID ) } )
        if not deleteResult[ 'OK' ]:
          self.log.error( deleteResult[ 'Message' ] )
          continue

    return S_OK()


  #.............................................................................


  def _summarizeLogs( self, element ):
    """ given an element, selects all logs in table <element>Log.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )

    :return: S_OK( lastID, listOfLogs ) / S_ERROR

    """

    selectResults = self.rsClient.selectStatusElement( element, 'Log' )

    if not selectResults[ 'OK' ]:
      return selectResults

    selectedItems = {}
    selectColumns = selectResults[ 'Columns' ]
    selectResults = selectResults[ 'Value' ]

    latestID = None
    if selectResults:
      latestID = dict( zip( selectColumns, selectResults[ -1 ] ) )[ 'ID' ]

    for selectResult in selectResults:

      elementDict = dict( zip( selectColumns, selectResult ) )

      key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] )

      if key not in selectedItems:
        selectedItems[ key ] = [ elementDict ]
      else:
        lastStatus = selectedItems[ key ][ -1 ][ 'Status' ]
        lastToken  = selectedItems[ key ][ -1 ][ 'TokenOwner' ]

        # If there are no changes on the Status or the TokenOwner with respect
        # the previous one, discards the log.
        if lastStatus != elementDict[ 'Status' ] or lastToken != elementDict[ 'TokenOwner' ]:
          selectedItems[ key ].append( elementDict )

    return S_OK( ( latestID, selectedItems ) )


  def _registerLogs( self, element, key, logs ):
    """ Given an element, a key - which is a tuple ( <name>, <statusType> )
    and a list of dictionaries, this method inserts them on the <element>History
    table. Before inserting them, checks whether the first one is or is not on
    the <element>History table. If it is, it is not inserted. It also checks
    whether the LastCheckTime parameter of the first log to be inserted is
    larger than the last history log LastCheckTime. If not, it means an agent
    cycle has been interrupted and we can run into inconsistencies. It aborts to
    prevent more dramatic results.

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **key** - `tuple`
        tuple with the name of the element and the statusType
      **logs** - `list`
        list of dictionaries containing the logs

     :return: S_OK / S_ERROR

    """

    # Undo key
    name, statusType = key

    selectedRes = self.rsClient.selectStatusElement( element, 'History', name,
                                                     statusType,
                                                     meta = { 'columns' : [ 'Status', 'LastCheckTime', 'TokenOwner' ],
                                                              'limit'   : 1,
                                                              'order'   : ('LastCheckTime', 'DESC') } )

    if not selectedRes[ 'OK' ]:
      return selectedRes
    selectedRes = selectedRes[ 'Value' ]

    # We want from the <element>History table the last Status, LastCheckTime
    # and TokenOwner
    lastStatus, lastCheckTime, lastToken = None, None, None
    if selectedRes:
      lastStatus, lastCheckTime, lastToken = selectedRes[ 0 ]

    # Sanity check to avoid running if an agent cycle has been stopped
    if lastCheckTime and logs[ 0 ][ 'LastCheckTime' ] < lastCheckTime:
      return S_ERROR( 'Overlapping data. Seems the DB has not been cleared properly' )

    # If the first of the selected items has a different status than the latest
    # on the history, we add it.
    if logs[ 0 ][ 'Status' ] == lastStatus and logs[ 0 ][ 'TokenOwner' ] == lastToken:
      logs.remove( logs[ 0 ] )

    if logs:
      self.log.info( '%s ( %s )' % ( name, statusType ) )

    for selectedItemDict in logs:

      res = self.__logToHistoryTable( element, selectedItemDict )
      if not res[ 'OK' ]:
        return res

    return S_OK()


  def __logToHistoryTable( self, element, elementDict ):
    """ Given an element and a dictionary with all the arguments, this method
    inserts a new entry on the <element>History table

    :Parameters:
      **element** - `string`
        name of the table family ( either Site, Resource and Node )
      **elementDict** - `dict`
        dictionary returned from the DB to be inserted on the History table

    :return: S_OK / S_ERROR

    """

    try:

      name            = elementDict[ 'Name' ]
      statusType      = elementDict[ 'StatusType' ]
      status          = elementDict[ 'Status' ]
      elementType     = elementDict[ 'ElementType' ]
      reason          = elementDict[ 'Reason' ]
      dateEffective   = elementDict[ 'DateEffective' ]
      lastCheckTime   = elementDict[ 'LastCheckTime' ]
      tokenOwner      = elementDict[ 'TokenOwner' ]
      tokenExpiration = elementDict[ 'TokenExpiration' ]

    except KeyError, e:
      return S_ERROR( e )

    self.log.info( '  %(Status)s %(DateEffective)s %(TokenOwner)s %(Reason)s' % elementDict )

    return self.rsClient.insertStatusElement( element, 'History', name, statusType,
                                              status, elementType, reason,
                                              dateEffective, lastCheckTime,
                                              tokenOwner, tokenExpiration )
Exemplo n.º 34
0
class Statistics( object ):
  """
  Statistics class that provides helpers to extract information from the database
  more easily.
  """

  def __init__( self ):
    """
    Constructor
    """

    self.rsClient = ResourceStatusClient()
    #self.rmClient = ResourceManagementClient()

  def getElementHistory( self, element, elementName, statusType,
                         oldAs = None, newAs = None ):
    """
    Returns the succession of statuses and the dates since they are effective. The
    values are comprised in the time interval [ oldAs, newAs ]. If not specified,
    all values up to the present are returned.

    It returns a list of tuples, of which the first element is the Status and the
    second one the time-stamp since it is effective. Note that the time-stamps will
    not necessarily match the time window.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **oldAs** - [ None, `datetime` ]
        datetime with the start point for the time window. If not specified, it
        is used the oldest time in the history.
      **newAs** - [ None, `datetime` ]
        datetime with the end point for the time window. If not specified, it
        is used datetime.utcnow.

    :return: S_OK( [ (StatusA, datetimeA),(StatusB,datetimeB) ] ) | S_ERROR
    """

    # Checks we are not passing a silly element ( we only accept Site, Resource and Node )
    if not element in getValidElements():
      return S_ERROR( '"%s" is not a valid element' % element )

    # FIXME: read below
    # Gets all elements in history. If the history is long, this query is going to
    # be rather heavy...
    result = self.rsClient.selectStatusElement( element, 'History', name = elementName,
                                                statusType = statusType,
                                                meta = { 'columns' : [ 'Status', 'DateEffective' ] } )
    if not result[ 'OK' ]:
      return result
    result = result[ 'Value' ]

    if not result:
      return S_OK( [] )

    # To avoid making exceptions in the for-loop, we feed history with the first
    # item in the results
    history = [ result[ 0 ] ]

    # Sets defaults.
    # OldAs is as old as datetime.min if not defined.

    #oldAs = ( 1 and oldAs ) or history[ 0 ][ 1 ]
    oldAs = ( 1 and oldAs ) or datetime.datetime.min

    # NewAs is as new as as set or datetime.now
    newAs = ( 1 and newAs ) or datetime.datetime.utcnow()

    # Sanity check: no funny time windows
    if oldAs > newAs:
      return S_ERROR( "oldAs (%s) > newAs (%s)" % ( oldAs, newAs ) )

    # This avoids that the window finishes before having the first point in the
    # history.
    if history[ 0 ][ 1 ] > newAs:
      return S_OK( [] )

    # Iterate starting from the second element in the list. The elements in the
    # list are SORTED. Otherwise, the break statement would be a mess. And same
    # applies for the elif
    for historyElement in result[1:]:

      # If the point is newer than the superior limit of the window, we are done.
      if historyElement[ 1 ] > newAs:
        break
      # If the point is older than the window lower limit, we buffer it. We just
      # want the closest point to the lower limit.
      elif historyElement[ 1 ] <= oldAs:
        history = [ historyElement ]
      # Otherwise, we add it to the history
      else:
        history.append( historyElement )

    return S_OK( history )

  def getElementStatusAt( self, element, elementName, statusType, statusTime ):
    """
    Returns the status of the <element><elementName><statusType> at the given
    time <statusTime>. If not know, will return an empty list. If known, will
    return a tuple with two elements: Status and time since it is effective.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **statusTime** - `datetime`
        datetime when we want to know the status of <element><elementName><statusType>

    :return: S_OK( (StatusA, datetimeA) ) | S_ERROR
    """

    result = self.getElementHistory( element, elementName, statusType, statusTime, statusTime )
    if not result[ 'OK' ]:
      return result
    result = result[ 'Value' ]

    if result:
      result = list( result[ 0 ] )

    return S_OK( result )

  def getElementStatusTotalTimes( self, element, elementName, statusType,
                                  oldAs = None, newAs = None ):
    """
    Returns a dictionary with all the possible statuses as keys and as values the
    number of seconds that <element><elementName><statusType> hold it for a time
    window between [ oldAs, newAs ]. If oldAs is not defined, it is considered
    as datetime.min. If newAs is not defined, it is considered datetime.utcnow.

    :Parameters:
      **element** - `str`
        element family ( either Site, Resource or Node )
      **elementName** - `str`
        element name
      **statusType** - `str`
        status type of the element <elementName> (e.g. 'all', 'ReadAccess',... )
      **oldAs** - [ None, `datetime` ]
        datetime with the start point for the time window. If not specified, it
        is used the oldest time in the history.
      **newAs** - [ None, `datetime` ]
        datetime with the end point for the time window. If not specified, it
        is used datetime.utcnow.

    :return: S_OK( [ { StatusA : secondsA },{ StatusB : secondsB } ] ) | S_ERROR
    """

    # Gets all history withing the window
    result = self.getElementHistory( element, elementName, statusType, oldAs, newAs )
    if not result[ 'OK' ]:
      return result
    result = result[ 'Value' ]

    # Dictionary to be returned
    statusCounter = dict.fromkeys( getValidStatus()[ 'Value' ], 0 )

    # If history is empty, return empty dictionary
    if not result:
      return S_OK( statusCounter )

    # Set defaults
    oldAs = ( 1 and oldAs ) or datetime.datetime.min
    newAs = ( 1 and newAs ) or datetime.datetime.utcnow()

    # If users are not behaving well, we force newAs to not be in the future.
    newAs = min( newAs, datetime.datetime.utcnow() )

    # Iterate over the results in tuples.
    for statusTuple in zip( result, result[ 1: ] ):

      # Make sure the time taken as base is not older than the lower limit of
      # the window. In principle, this should be only checked on the first element,
      # but it is harmless anyway and cleaner than the if-else.
      startingPoint = max( statusTuple[ 0 ][ 1 ], oldAs )

      # Get number of seconds and add them
      statusCounter[ statusTuple[0][0] ] += timedelta_to_seconds( statusTuple[1][1] - startingPoint )

    # The method selected to iterate over the results does not take into account the
    # last one. Gets the time using as lower limit the window lower limit. This applies
    # when we have only one element in the list for example.
    statusCounter[ result[ -1 ][ 0 ] ] += timedelta_to_seconds( newAs - max( result[ -1 ][ 1 ], oldAs ) )

    return S_OK( statusCounter )
Exemplo n.º 35
0
def setToken(user):
    """
    Function that gets the user token, sets the validity for it. Gets the elements
    in the database for a given name and statusType(s). Then updates the status
    of all them adding a reason and the token.
    """

    rssClient = ResourceStatusClient()

    # This is a little bit of a nonsense, and certainly needs to be improved.
    # To modify a list of elements, we have to do it one by one. However, the
    # modify method does not discover the StatusTypes ( which in this script is
    # an optional parameter ). So, we get them from the DB and iterate over them.
    elements = rssClient.selectStatusElement(
        switchDict["element"],
        "Status",
        name=switchDict["name"],
        statusType=switchDict["statusType"],
        vO=switchDict["VO"],
        meta={"columns": ["StatusType", "TokenOwner"]},
    )

    if not elements["OK"]:
        return elements
    elements = elements["Value"]

    # If there list is empty they do not exist on the DB !
    if not elements:
        subLogger.warn("Nothing found for %s, %s, %s %s" %
                       (switchDict["element"], switchDict["name"],
                        switchDict["VO"], switchDict["statusType"]))
        return S_OK()

    # If we want to release the token
    if switchDict["releaseToken"]:
        tokenExpiration = datetime.max
        newTokenOwner = "rs_svc"
    else:
        tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta(
            days=int(switchDict["days"]))
        newTokenOwner = user

    subLogger.always("New token: %s --- until %s" %
                     (newTokenOwner, tokenExpiration))

    for statusType, tokenOwner in elements:

        # If a user different than the one issuing the command and RSS
        if tokenOwner != user and tokenOwner != "rs_svc":
            subLogger.info("%s(%s) belongs to the user: %s" %
                           (switchDict["name"], statusType, tokenOwner))

        # does the job
        result = rssClient.modifyStatusElement(
            switchDict["element"],
            "Status",
            name=switchDict["name"],
            statusType=statusType,
            reason=switchDict["reason"],
            tokenOwner=newTokenOwner,
            vO=switchDict["VO"],
            tokenExpiration=tokenExpiration,
        )
        if not result["OK"]:
            return result

        if tokenOwner == newTokenOwner:
            msg = "(extended)"
        elif newTokenOwner == "rs_svc":
            msg = "(released)"
        else:
            msg = "(aquired from %s)" % tokenOwner

        subLogger.info("name:%s, VO:%s statusType:%s %s" %
                       (switchDict["name"], switchDict["VO"], statusType, msg))
    return S_OK()
Exemplo n.º 36
0
class TokenAgent( AgentModule ):
  '''
    TokenAgent is in charge of checking tokens assigned on resources.
    Notifications are sent to those users owning expiring tokens.
  '''

  # Rss token
  __rssToken = 'rs_svc'

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''

    AgentModule.__init__( self, *args, **kwargs )

    self.notifyHours = 12
    self.adminMail = ''

    self.rsClient = None
    self.tokenDict = {}
    self.diracAdmin = None

  def initialize( self ):
    ''' TokenAgent initialization
    '''

    self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours )
    self.adminMail   = self.am_getOption( 'adminMail', self.adminMail )

    self.rsClient = ResourceStatusClient()
    self.diracAdmin = DiracAdmin()

    return S_OK()

  def execute( self ):
    '''
      Looks for user tokens. If they are expired, or expiring, it notifies users.
    '''

    # Initialized here, as it is needed empty at the beginning of the execution
    self.tokenDict = {}

    elements = ( 'Site', 'Resource', 'Node' )

    for element in elements:

      self.log.info( 'Processing %s' % element )

      interestingTokens = self._getInterestingTokens( element )
      if not interestingTokens[ 'OK' ]:
        self.log.error( interestingTokens[ 'Message' ] )
        continue
      interestingTokens = interestingTokens[ 'Value' ]

      processTokens = self._processTokens( element, interestingTokens )
      if not processTokens[ 'OK' ]:
        self.log.error( processTokens[ 'Message' ] )
        continue

    notificationResult = self._notifyOfTokens()
    if not notificationResult[ 'OK' ]:
      self.log.error( notificationResult[ 'Message' ] )

    return S_OK()

  def _getInterestingTokens( self, element ):
    '''
      Given an element, picks all the entries with TokenExpiration < now + X<hours>
      If the TokenOwner is not the rssToken ( rs_svc ), it is selected.
    '''

    tokenExpLimit = datetime.utcnow() + timedelta( hours = self.notifyHours )

    tokenElements = self.rsClient.selectStatusElement( element, 'Status',
                                                       meta = { 'older' : ( 'TokenExpiration', tokenExpLimit ) } )

    if not tokenElements[ 'OK' ]:
      return tokenElements

    tokenColumns = tokenElements[ 'Columns' ]
    tokenElements = tokenElements[ 'Value' ]

    interestingTokens = []

    for tokenElement in tokenElements:

      tokenElement = dict( zip( tokenColumns, tokenElement ) )

      if tokenElement[ 'TokenOwner' ] != self.__rssToken:
        interestingTokens.append( tokenElement )

    return S_OK( interestingTokens )

  def _processTokens( self, element, tokenElements ):
    '''
      Given an element and a list of interesting token elements, updates the
      database if the token is expired, logs a message and adds
    '''

    never = datetime.max

    for tokenElement in tokenElements:

      try:
        name = tokenElement[ 'Name' ]
        statusType = tokenElement[ 'StatusType' ]
        status = tokenElement[ 'Status' ]
        tokenOwner = tokenElement[ 'TokenOwner' ]
        tokenExpiration = tokenElement[ 'TokenExpiration' ]
      except KeyError as e:
        return S_ERROR( e )

      # If token has already expired
      if tokenExpiration < datetime.utcnow():
        _msg = '%s with statusType "%s" and owner %s EXPIRED'
        self.log.info( _msg % ( name, statusType, tokenOwner ) )

        result = self.rsClient.addOrModifyStatusElement( element, 'Status', name = name,
                                                         statusType = statusType,
                                                         tokenOwner = self.__rssToken,
                                                         tokenExpiration = never )
        if not result[ 'OK' ]:
          return result

      else:
        _msg = '%s with statusType "%s" and owner %s -> %s'
        self.log.info( _msg % ( name, statusType, tokenOwner, tokenExpiration ) )

      if tokenOwner not in self.tokenDict:
        self.tokenDict[ tokenOwner ] = []

      self.tokenDict[ tokenOwner ].append( [ tokenOwner, element, name, statusType, status, tokenExpiration ] )

    return S_OK()

  def _notifyOfTokens( self ):
    '''
      Splits interesing tokens between expired and expiring. Also splits them
      among users. It ends sending notifications to the users.
    '''

    now = datetime.utcnow()

    adminExpired = []
    adminExpiring = []

    for tokenOwner, tokenLists in self.tokenDict.items():

      expired = []
      expiring = []

      for tokenList in tokenLists:

        if tokenList[ 5 ] < now:
          expired.append( tokenList )
          adminExpired.append( tokenList )
        else:
          expiring.append( tokenList )
          adminExpiring.append( tokenList )

      resNotify = self._notify( tokenOwner, expired, expiring )
      if not resNotify[ 'OK' ]:
        self.log.error( 'Failed to notify token owner', resNotify[ 'Message' ] )

    if (adminExpired or adminExpiring) and self.adminMail:
      return self._notify(self.adminMail, adminExpired, adminExpiring)

    return S_OK()

  def _notify( self, tokenOwner, expired, expiring ):
    '''
      Given a token owner and a list of expired and expiring tokens, sends an
      email to the user.
    '''

    subject = 'RSS token summary for tokenOwner %s' % tokenOwner

    mail = '\nEXPIRED tokens ( RSS has taken control of them )\n'
    for tokenList in expired:

      mail += ' '.join( [ str(x) for x in tokenList ] )
      mail += '\n'

    mail = '\nEXPIRING tokens ( RSS will take control of them )\n'
    for tokenList in expiring:

      mail += ' '.join( [ str(x) for x in tokenList ] )
      mail += '\n'

    mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n"
    mail += " Or you can use the dirac-rss-set-token script\n\n"
    mail += "Through the same interfaces you can release the token any time\n"

    # FIXME: you can re-take control of them using this or that...

    resEmail = self.diracAdmin.sendMail( tokenOwner, subject, mail )
    if not resEmail[ 'OK' ]:
      return S_ERROR( 'Cannot send email to user "%s"' % tokenOwner )

    return resEmail
Exemplo n.º 37
0
class SummarizeLogsAgent(AgentModule):

    # Date format in database
    __dateFormat = '%Y-%m-%d %H:%M:%S'

    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''

        AgentModule.__init__(self, *args, **kwargs)

        self.rsClient = None

    def initialize(self):
        ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

        self.rsClient = ResourceStatusClient()

        return S_OK()

    def execute(self):

        # FIXME: probably this can be obtained from RssConfiguration instead
        elements = ('Site', 'Resource', 'Node')

        # We do not want neither minutes, nor seconds nor microseconds
        thisHour = datetime.utcnow().replace(microsecond=0)
        thisHour = thisHour.replace(second=0).replace(minute=0)

        for element in elements:

            self.log.info('Summarizing %s' % element)

            selectLogElements = self._selectLogElements(element, thisHour)
            if not selectLogElements['OK']:
                self.log.error(selectLogElements['Message'])
                continue
            selectLogElements = selectLogElements['Value']

            for selectedKey, selectedItem in selectLogElements.items():

                sRes = self._logSelectedLogElement(element, selectedKey,
                                                   selectedItem, thisHour)
                if not sRes['OK']:
                    self.log.error(sRes['Message'])
                    break

        return S_OK()

    def _selectLogElements(self, element, thisHour):
        '''
      For a given element, selects all the entries on the <element>Log table
      with LastCheckTime > <lastHour>. It groups them by tuples of
      ( <name>, <statusType> ) and keeps only the statuses that represent
      a change in the status.
    '''

        lastHour = thisHour - timedelta(hours=1)

        selectResults = self.rsClient.selectStatusElement(
            element, 'Log', meta={'newer': ('LastCheckTime', lastHour)})
        if not selectResults['OK']:
            return selectResults

        selectedItems = {}
        selectColumns = selectResults['Columns']
        selectResults = selectResults['Value']

        for selectResult in selectResults:

            elementDict = dict(zip(selectColumns, selectResult))

            if elementDict['LastCheckTime'] > thisHour:
                continue

            key = (elementDict['Name'], elementDict['StatusType'])

            if not key in selectedItems:
                selectedItems[key] = [elementDict]
            else:
                lastStatus = selectedItems[key][-1]['Status']
                if lastStatus != elementDict['Status']:
                    selectedItems[key].append(elementDict)

        return S_OK(selectedItems)

    def _logSelectedLogElement(self, element, selectedKey, selectedItem,
                               thisHour):
        '''
      Given an element, a selectedKey - which is a tuple ( <name>, <statusType> )
      and a list of dictionaries, this method inserts them. Before inserting
      them, checks whether the first one is or is not on the <element>History
      table. If it is, it is not inserted.
    '''

        name, statusType = selectedKey

        selectedRes = self.rsClient.selectStatusElement(
            element,
            'History',
            name,
            statusType,
            meta={'columns': ['Status', 'LastCheckTime']})

        if not selectedRes['OK']:
            return selectedRes
        selectedRes = selectedRes['Value']

        selectedStatus = None
        if selectedRes:

            # Get the last selectedRes, which will be the newest one. Each selectedRes
            # is a tuple, in this case, containing two elements - Status, LastCheckTime
            selectedStatus, selectedLastTime = selectedRes[-1]

            if selectedLastTime > thisHour - timedelta(hours=1):
                return S_ERROR(
                    'The agent has run once on this time span, skipping')

        # If the first of the selected items has a different status than the latest
        # on the history, we add it.
        if selectedItem[0]['Status'] != selectedStatus:

            res = self._logToHistoryTable(element, selectedItem[0])
            if not res['OK']:
                return res

        for selectedItemDict in selectedItem[1:]:

            res = self._logToHistoryTable(element, selectedItemDict)
            if not res['OK']:
                return res

        return S_OK()

    def _logToHistoryTable(self, element, elementDict):
        '''
      Given an element and a dictionary with all the arguments, this method
      inserts a new entry on the <element>History table
    '''

        try:

            name = elementDict['Name']
            statusType = elementDict['StatusType']
            status = elementDict['Status']
            elementType = elementDict['ElementType']
            reason = elementDict['Reason']
            dateEffective = elementDict['DateEffective']
            lastCheckTime = elementDict['LastCheckTime']
            tokenOwner = elementDict['TokenOwner']
            tokenExpiration = elementDict['TokenExpiration']

        except KeyError, e:
            return S_ERROR(e)

        return self.rsClient.insertStatusElement(element, 'History', name,
                                                 statusType, status,
                                                 elementType, reason,
                                                 dateEffective, lastCheckTime,
                                                 tokenOwner, tokenExpiration)