def _TryAcquire(self, resource_type, resource_id, expected_status=Lock.ACQUIRED_LOCK, resource_data=None, detect_abandonment=False, release_lock=True, test_hook=None): Lock._TryAcquire(self._client, resource_type, resource_id, lambda lock, status: self.stop((lock, status)), resource_data=resource_data, detect_abandonment=detect_abandonment, test_hook=test_hook) lock, status = self.wait() if release_lock and status != Lock.FAILED_TO_ACQUIRE_LOCK: self._Release(lock) self.assertEqual(status, expected_status) if status != Lock.FAILED_TO_ACQUIRE_LOCK: self.assertEqual(Lock.DeconstructLockId(lock.lock_id), (resource_type, resource_id)) self.assertIsNotNone(lock.owner_id) if detect_abandonment: self.assertAlmostEqual(lock.expiration, time.time() + Lock.ABANDONMENT_SECS, delta=Lock.ABANDONMENT_SECS / 4) self.assertEqual(lock.resource_data, resource_data) return lock
def _Race(update_func): if len(lock_to_release) == 0: # Win race to acquire lock. Lock.TryAcquire(self._client, 'op', 'id', partial(_OnAcquire, update_func)) elif len(lock_to_release) == 1: # Win race to update acquire_failures. Lock.TryAcquire(self._client, 'op', 'id', partial(_OnAcquire, update_func)) else: update_func()
def testScanAbandonedLocks(self): """Test scanning for locks which were abandoned due to server failure.""" def _OpMethod(client, callback): self._method_count += 1 callback() if self._method_count == 10: self.io_loop.add_callback(self.stop) for i in xrange(10): lock = self._AcquireOpLock(user_id=i / 2) lock.Abandon(self._client, self.stop) self.wait() self._CreateTestOp(user_id=i / 2, handler=_OpMethod) # Add an expired non-op lock to ensure that it's skipped over during scan. Lock.TryAcquire(self._client, LockResourceType.Job, 'v0', lambda lock, status: self.stop(lock), detect_abandonment=True) lock = self.wait() lock.Abandon(self._client, self.stop) self.wait() # Now scan for abandoned locks. op_mgr = self._CreateOpManager(handlers=[_OpMethod]) op_mgr._ScanAbandonedLocks() self.wait() self._RunAsync(op_mgr.Drain)
def _ExecuteOp(self, user_id, handler, wait_for_op=True, **kwargs): op = self._CreateTestOp(user_id=user_id, handler=handler, **kwargs) # Don't call self.stop() until we've gotten both the "completed all" and the "op wait" callbacks. with util.Barrier(self.stop) as b: self.user_op_mgr = self._CreateUserOpManager(user_id=user_id, handlers=[handler], callback=b.Callback()) self.user_op_mgr.Execute(op.operation_id, b.Callback() if wait_for_op else None) self.wait() # Ensure that lock is released. lock_id = Lock.ConstructLockId(LockResourceType.Operation, str(user_id)) Lock.Query(self._client, lock_id, None, lambda lock: self.stop(lock), must_exist=False) lock = self.wait() self.assertIsNone(lock, 'operation lock should have been released') return op
def testRaceToUpdateReleasedLock(self): """Test case where failed lock acquirer tries to update lock after it's been released.""" def _Race(lock_to_release, update_func): # Release the acquired lock after the current attempt has queried the row for the lock # and has its own instance of the lock. lock_to_release.Release(self._client, callback=update_func) # First, acquire the lock. lock_to_release = self._TryAcquire('op', 'id', release_lock=False) # Now, try to acquire, expecting failure because we'll release it in the test hook. self._TryAcquire('op', 'id', expected_status=Lock.FAILED_TO_ACQUIRE_LOCK, test_hook=partial(_Race, lock_to_release), release_lock=False) # Now, check for lock row. There shouldn't be one. lock_id = Lock.ConstructLockId('op', 'id') lock = self._RunAsync(Lock.Query, self._client, lock_id, None, must_exist=False) self.assertIsNone(lock, 'Lock row should not exist')
def _Race(update_func): if len(lock_to_release) == 0: # Win race to acquire the abandoned lock. Lock.TryAcquire(self._client, 'op', 'id', partial(_OnAcquire, update_func)) else: update_func()
def _AddOpMethod3(client, callback): """Create operation with lower op id.""" with util.Barrier(callback) as b: op_dict = self._CreateTestOpDict(user_id=1, handler=self._OpMethod) op_dict['operation_id'] = Operation.ConstructOperationId(1, 1) Operation.CreateFromKeywords(**op_dict).Update(client, b.Callback()) # Try to acquire lock, which has side effect of incrementing "acquire_failures" and triggering requery. Lock.TryAcquire(self._client, LockResourceType.Operation, '1', b.Callback())
def _ScanAbandonedLocks(self): """Periodically scans the Locks table looking for abandoned operation locks. If any are found, the associated operations are executed. TODO(Andy): Scanning for abandoned locks really should go into a LockManager class. See header for lock.py. """ max_timeout_secs = OpManager._MAX_SCAN_ABANDONED_LOCKS_INTERVAL.total_seconds() while True: # If there are too many active users, do not scan. if len(self._active_users) < self._MAX_USERS_OUTSTANDING: try: last_key = None while True: limit = min(self._MAX_USERS_OUTSTANDING - len(self._active_users), OpManager._SCAN_LIMIT) locks, last_key = yield gen.Task(Lock.ScanAbandoned, self._client, limit=limit, excl_start_key=last_key) for lock in locks: resource_type, resource_id = Lock.DeconstructLockId(lock.lock_id) if resource_type == LockResourceType.Operation: user_id = int(resource_id) logging.info('scanned operation lock for user %d' % user_id) # Create a clean context for this operation since we're not blocking the current # coroutine on it. with stack_context.NullContext(): with util.ExceptionBarrier(util.LogExceptionCallback): self.MaybeExecuteOp(self._client, user_id, lock.resource_data) # Keep iterating until all abandoned locks have been found, otherwise wait until the next scan time. if last_key is None: break except Exception: logging.exception('abandoned lock scan failed') # Wait until next scan time. timeout_secs = random.random() * max_timeout_secs timeout_time = time.time() + timeout_secs logging.debug('next scan in %.2fs' % timeout_secs) yield gen.Task(IOLoop.current().add_timeout, timeout_time)
def _AcquireOpLock(self, user_id, operation_id=None): Lock.TryAcquire(self._client, LockResourceType.Operation, str(user_id), lambda lock, status: self.stop(lock), resource_data=operation_id) return self.wait()