Пример #1
0
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False):
    """Try to lock the specified node.

    :param context: the context used for DB operations;
    :param node_id: ID of the node to be locked.
    :param action_id: ID of the action that attempts to lock the node.
    :param engine: ID of the engine that attempts to lock the node.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = db_api.node_lock_acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for node %s again' % node_id)
        owner = db_api.node_lock_acquire(node_id, action_id)
        if action_id == owner:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = db_api.node_lock_steal(node_id, action_id)
        return action_id == owner

    # if this node lock by dead engine
    action = db_api.action_get(context, owner)
    if (action and action.owner and action.owner != engine
            and is_engine_dead(context, action.owner)):
        LOG.info(
            _LI('The node %(n)s is locked by dead action %(a)s, '
                'try to steal the lock.'), {
                    'n': node_id,
                    'a': owner
                })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context,
                                  action.id,
                                  time.time(),
                                  reason=reason)
        db_api.node_lock_steal(node_id, action_id)
        return True

    LOG.error(
        _LE('Node is already locked by action %(old)s, '
            'action %(new)s failed grabbing the lock'), {
                'old': owner,
                'new': action_id
            })

    return False
Пример #2
0
def cluster_lock_acquire(context, cluster_id, action_id, engine=None,
                         scope=CLUSTER_SCOPE, forced=False):
    """Try to lock the specified cluster.

    :param cluster_id: ID of the cluster to be locked.
    :param action_id: ID of the action which wants to lock the cluster.
    :param engine: ID of the engine which wants to lock the cluster.
    :param scope: scope of lock, could be cluster wide lock, or node-wide
                  lock.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """

    # Step 1: try lock the cluster - if the returned owner_id is the
    #         action id, it was a success
    owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
    if action_id in owners:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for cluster %s again' % cluster_id)
        owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
        if action_id in owners:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    # Will reach here only because scope == CLUSTER_SCOPE
    action = db_api.action_get(context, owners[0])
    if (action and action.owner and action.owner != engine and
            is_engine_dead(context, action.owner)):
        LOG.info(_LI('The cluster %(c)s is locked by dead action %(a)s, '
                     'try to steal the lock.'), {
            'c': cluster_id,
            'a': owners[0]
        })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context, action.id, time.time(),
                                  reason=reason)
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    LOG.error(_LE('Cluster is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock'),
              {'old': str(owners), 'new': action_id})

    return False
Пример #3
0
def node_lock_acquire(context, node_id, action_id, engine=None,
                      forced=False):
    """Try to lock the specified node.

    :param context: the context used for DB operations;
    :param node_id: ID of the node to be locked.
    :param action_id: ID of the action that attempts to lock the node.
    :param engine: ID of the engine that attempts to lock the node.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = db_api.node_lock_acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for node %s again' % node_id)
        owner = db_api.node_lock_acquire(node_id, action_id)
        if action_id == owner:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = db_api.node_lock_steal(node_id, action_id)
        return action_id == owner

    # if this node lock by dead engine
    action = db_api.action_get(context, owner)
    if (action and action.owner and action.owner != engine and
            is_engine_dead(context, action.owner)):
        LOG.info(_LI('The node %(n)s is locked by dead action %(a)s, '
                     'try to steal the lock.'), {
            'n': node_id,
            'a': owner
        })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context, action.id, time.time(),
                                  reason=reason)
        db_api.node_lock_steal(node_id, action_id)
        return True

    LOG.error(_LE('Node is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock'),
              {'old': owner, 'new': action_id})

    return False
Пример #4
0
    def set_status(self, result, reason=None):
        """Set action status based on return value from execute."""

        timestamp = wallclock()

        if result == self.RES_OK:
            status = self.SUCCEEDED
            db_api.action_mark_succeeded(self.context, self.id, timestamp)

        elif result == self.RES_ERROR:
            status = self.FAILED
            db_api.action_mark_failed(self.context, self.id, timestamp,
                                      reason=reason or 'ERROR')

        elif result == self.RES_TIMEOUT:
            status = self.FAILED
            db_api.action_mark_failed(self.context, self.id, timestamp,
                                      reason=reason or 'TIMEOUT')

        elif result == self.RES_CANCEL:
            status = self.CANCELLED
            db_api.action_mark_cancelled(self.context, self.id, timestamp)

        else:  # result == self.RES_RETRY:
            status = self.READY
            # Action failed at the moment, but can be retried
            # We abandon it and then notify other dispatchers to execute it
            db_api.action_abandon(self.context, self.id)

        if status == self.SUCCEEDED:
            EVENT.info(self.context, self, self.action, status, reason)
        elif status == self.READY:
            EVENT.warning(self.context, self, self.action, status, reason)
        else:
            EVENT.error(self.context, self, self.action, status, reason)

        self.status = status
        self.status_reason = reason
Пример #5
0
    def set_status(self, result, reason=None):
        '''Set action status based on return value from execute.'''

        timestamp = wallclock()

        if result == self.RES_OK:
            status = self.SUCCEEDED
            msg = _LI('Action %(name)s [%(id)s] completed with SUCCESS.')
            db_api.action_mark_succeeded(self.context, self.id, timestamp)

        elif result == self.RES_ERROR:
            status = self.FAILED
            msg = _LI('Action %(name)s [%(id)s] failed with ERROR.')
            db_api.action_mark_failed(self.context, self.id, timestamp,
                                      reason=reason or 'ERROR')

        elif result == self.RES_TIMEOUT:
            status = self.FAILED
            msg = _LI('Action %(name)s [%(id)s] failed with TIMEOUT.')
            db_api.action_mark_failed(self.context, self.id, timestamp,
                                      reason=reason or 'TIMEOUT')

        elif result == self.RES_CANCEL:
            status = self.CANCELLED
            msg = _LI('Action %(name)s [%(id)s] was cancelled.')
            db_api.action_mark_cancelled(self.context, self.id, timestamp)

        else:  # result == self.RES_RETRY:
            status = self.READY
            # Action failed at the moment, but can be retried
            # We abandon it and then notify other dispatchers to execute it
            db_api.action_abandon(self.context, self.id)
            msg = _LI('Action %(name)s [%(id)s] aborted with RETRY.')

        LOG.info(msg, {'name': self.action, 'id': self.id, 'status': status})
        self.status = status
        self.status_reason = reason
Пример #6
0
 def mark_failed(cls, context, action_id, timestamp, reason=None):
     return db_api.action_mark_failed(context, action_id, timestamp, reason)
Пример #7
0
 def mark_failed(cls, context, action_id, timestamp, reason=None):
     return db_api.action_mark_failed(context, action_id, timestamp, reason)
Пример #8
0
 def mark_failed(cls, context, action_id, timestamp):
     return db_api.action_mark_failed(context, action_id, timestamp)
Пример #9
0
def cluster_lock_acquire(context,
                         cluster_id,
                         action_id,
                         engine=None,
                         scope=CLUSTER_SCOPE,
                         forced=False):
    """Try to lock the specified cluster.

    :param cluster_id: ID of the cluster to be locked.
    :param action_id: ID of the action which wants to lock the cluster.
    :param engine: ID of the engine which wants to lock the cluster.
    :param scope: scope of lock, could be cluster wide lock, or node-wide
                  lock.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """

    # Step 1: try lock the cluster - if the returned owner_id is the
    #         action id, it was a success
    owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
    if action_id in owners:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for cluster %s again' % cluster_id)
        owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
        if action_id in owners:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    # Will reach here only because scope == CLUSTER_SCOPE
    action = db_api.action_get(context, owners[0])
    if (action and action.owner and action.owner != engine
            and is_engine_dead(context, action.owner)):
        LOG.info(
            _LI('The cluster %(c)s is locked by dead action %(a)s, '
                'try to steal the lock.'), {
                    'c': cluster_id,
                    'a': owners[0]
                })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context,
                                  action.id,
                                  time.time(),
                                  reason=reason)
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    LOG.error(
        _LE('Cluster is already locked by action %(old)s, '
            'action %(new)s failed grabbing the lock'), {
                'old': str(owners),
                'new': action_id
            })

    return False