def test_use_specified_duration(self, mock_svc): mock_svc.return_value = mock.Mock(updated_at=timeutils.utcnow(True)) res = utils.is_engine_dead(self.ctx, 'fake_engine_id', 10000) self.assertFalse(res) mock_svc.assert_called_once_with(self.ctx, 'fake_engine_id')
def test_engine_is_alive(self, mock_svc): mock_svc.return_value = mock.Mock(updated_at=timeutils.utcnow(True)) res = utils.is_engine_dead(self.ctx, 'fake_engine_id') self.assertFalse(res) mock_svc.assert_called_once_with(self.ctx, 'fake_engine_id')
def test_engine_is_dead(self, mock_service): delta = datetime.timedelta(seconds=3 * cfg.CONF.periodic_interval) update_time = timeutils.utcnow(True) - delta mock_service.return_value = mock.Mock(updated_at=update_time) res = utils.is_engine_dead(self.ctx, 'fake_engine_id') self.assertTrue(res) mock_service.assert_called_once_with(self.ctx, 'fake_engine_id')
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param context: the context used for DB operations. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success for retries in range(3): try: owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope) if action_id in owners: return True except exception.DBDuplicateEntry: LOG.info('Duplicate entry in cluster_lock table for %(c)s. ' 'Retrying cluster lock.', {'c': cluster_id}) eventlet.sleep(random.randrange(1, 3)) # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: owners = cl_obj.ClusterLock.steal(cluster_id, action_id) return action_id in owners # Step 3: check if the owner is a dead engine, if so, steal the lock. # Will reach here only because scope == CLUSTER_SCOPE action = ao.Action.get(context, owners[0]) if (action and action.owner and action.owner != engine and utils.is_engine_dead(context, action.owner)): LOG.info('The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.', {'c': cluster_id, 'a': owners[0]}) dead_engine = action.owner owners = cl_obj.ClusterLock.steal(cluster_id, action_id) # Cleanse locks affected by the dead engine objects.Service.gc_by_engine(dead_engine) return action_id in owners lock_owners = [] for o in owners: lock_owners.append(o[:8]) LOG.warning('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock', {'old': str(lock_owners), 'new': action_id[:8]}) return False
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: owners = cl_obj.ClusterLock.steal(cluster_id, action_id) return action_id in owners # Step 3: check if the owner is a dead engine, if so, steal the lock. # Will reach here only because scope == CLUSTER_SCOPE action = ao.Action.get(context, owners[0]) if (action and action.owner and action.owner != engine and utils.is_engine_dead(context, action.owner)): LOG.info( 'The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.', { 'c': cluster_id, 'a': owners[0] }) dead_engine = action.owner owners = cl_obj.ClusterLock.steal(cluster_id, action_id) # Cleanse locks affected by the dead engine objects.Service.gc_by_engine(dead_engine) return action_id in owners LOG.error( 'Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock', { 'old': str(owners), 'new': action_id }) return False
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations. :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = nl_obj.NodeLock.acquire(node_id, action_id) if action_id == owner: return True # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: owner = nl_obj.NodeLock.steal(node_id, action_id) return action_id == owner # Step 3: Try to steal a lock if it's owner is a dead engine. # if this node lock by dead engine action = ao.Action.get(context, owner) if (action and action.owner and action.owner != engine and utils.is_engine_dead(context, action.owner)): LOG.info( 'The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.', { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') nl_obj.NodeLock.steal(node_id, action_id) ao.Action.mark_failed(context, action.id, time.time(), reason) return True LOG.warning( 'Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock', { 'old': owner, 'new': action_id }) return False
def test_engine_is_none(self, mock_service): mock_service.return_value = None self.assertTrue(utils.is_engine_dead(self.ctx, 'fake_engine_id')) mock_service.assert_called_once_with(self.ctx, 'fake_engine_id')