def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = 'test_session' self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy('test-cluster', self._scheduler, self._session_key) self.init_updater(deepcopy(self.UPDATE_CONFIG))
def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._job_monitor = MockObject(JobMonitor) self._scheduler_mux = FakeSchedulerMux() self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy(Cluster(name='test-cluster'), self._scheduler) self._quota_check = MockObject(QuotaCheck) self.init_updater(deepcopy(self.UPDATE_CONFIG)) self._num_cpus = 1.0 self._num_ram = 1 self._num_disk = 1
def _prepare_clone_mock(self, status): drv = self._driver mox = self._mox volume = FakeVolume() setattr(volume, 'provider_location', '127.0.0.1:/nfs') drv._client = MockObject(suds.client.Client) drv._client.factory = MockObject(suds.client.Factory) drv._client.service = MockObject(suds.client.ServiceSelector) # CloneNasFile method is generated by ServiceSelector at runtime from # the # XML, so mocking is impossible. setattr( drv._client.service, 'CloneNasFile', types.MethodType(lambda *args, **kwargs: FakeResponce(status), suds.client.ServiceSelector)) mox.StubOutWithMock(drv, '_get_host_ip') mox.StubOutWithMock(drv, '_get_export_path') drv._get_host_ip(IgnoreArg()).AndReturn('127.0.0.1') drv._get_export_path(IgnoreArg()).AndReturn('/nfs') return mox
def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = 'test_session' self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._job_monitor = MockObject(JobMonitor) self._scheduler_mux = FakeSchedulerMux() self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy(Cluster(name='test-cluster'), self._scheduler, self._session_key) self._quota_check = MockObject(QuotaCheck) self.init_updater(deepcopy(self.UPDATE_CONFIG)) self._num_cpus = 1.0 self._num_ram = 1 self._num_disk = 1
class UpdaterTest(TestCase): UPDATE_CONFIG = { 'batch_size': 3, 'restart_threshold': 50, 'watch_secs': 50, 'max_per_shard_failures': 0, 'max_total_failures': 0, } def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = 'test_session' self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy('test-cluster', self._scheduler, self._session_key) self.init_updater(deepcopy(self.UPDATE_CONFIG)) def replay_mocks(self): Replay(self._scheduler) Replay(self._instance_watcher) def verify_mocks(self): Verify(self._scheduler) Verify(self._instance_watcher) def init_updater(self, update_config): self._config = FakeConfig(self._role, self._name, self._env, update_config) self._updater = Updater(self._config, 3, self._scheduler_proxy, self._instance_watcher) def expect_watch_instances(self, instance_ids, failed_instances=[]): self._instance_watcher.watch(instance_ids).AndReturn(set(failed_instances)) def expect_populate(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, message='test') result = set([deepcopy(job_config.taskConfig)]) resp.result = Result(populateJobResult=PopulateJobResult(populated=result)) self._scheduler.populateJobConfig(job_config, JobConfigValidation.RUN_FILTERS).AndReturn(resp) def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append(ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=scheduled)) query = TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response) def expect_cron_replace(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, message='test') self._scheduler.replaceCronTemplate(job_config, self._lock, self._session_key).AndReturn(resp) def expect_restart(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') self._scheduler.restartShards( self._job_key, instance_ids, self._lock, self._session_key).AndReturn(response) def expect_kill(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') query = TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=frozenset([int(s) for s in instance_ids])) self._scheduler.killTasks(query, self._lock, self._session_key).AndReturn(response) def expect_add(self, instance_ids, task_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') add_config = AddInstancesConfig( key=self._job_key, taskConfig=task_config, instanceIds=frozenset([int(s) for s in instance_ids])) self._scheduler.addInstances(add_config, self._lock, self._session_key).AndReturn(response) def expect_start(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') response.result = Result(acquireLockResult=AcquireLockResult(lock=self._lock)) self._scheduler.acquireLock(LockKey(job=self._job_key), self._session_key).AndReturn(response) def expect_finish(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') self._scheduler.releaseLock( self._lock, LockValidation.CHECKED, self._session_key).AndReturn(response) def make_task_configs(self, count=1): return [TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=6.0, ramMb=1024, diskMb=2048, priority=0, maxTaskFailures=1, production=True, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. )] * count def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration( key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=task_config, instanceCount=instance_count ) def update_and_expect_ok(self, instances=None): self.update_and_expect_response(ResponseCode.OK, instances) def update_and_expect_response(self, expected_code, instances=None): resp = self._updater.update(instances) assert expected_code == resp.responseCode, ( 'Expected response:%s Actual response:%s' % (expected_code, resp.responseCode)) def test_grow(self): """Adds instances to the existing job.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_add([6], new_config) self.expect_watch_instances([6]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_shrink(self): """Reduces the number of instances of the job.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([3, 4, 5]) self.expect_kill([6, 7, 8]) self.expect_kill([9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_grow(self): """Updates existing instances and adds new ones.""" old_configs = self.make_task_configs(3) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2]) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_add([6], new_config) self.expect_watch_instances([6]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_shrink(self): """Updates some existing instances and reduce the instance count.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 1) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0], new_config) self.expect_watch_instances([0]) self.expect_kill([3, 4, 5]) self.expect_kill([6, 7, 8]) self.expect_kill([9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_instances(self): """Update existing instances.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_with_instance_option(self): """Adding instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_add([3, 4], new_config) self.expect_watch_instances([3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[3, 4]) self.verify_mocks() def test_shrink_with_instance_option(self): """Reducing instance count by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 4) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([4, 5, 6]) self.expect_kill([7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[4, 5, 6, 7, 8, 9]) self.verify_mocks() def test_update_with_instance_option(self): """Updating existing instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([2, 3, 4]) self.expect_add([2, 3, 4], new_config) self.expect_watch_instances([2, 3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[2, 3, 4]) self.verify_mocks() def test_patch_hole_with_instance_option(self): """Patching an instance ID gap created by a terminated update.""" old_configs = self.make_task_configs(8) new_config = old_configs[0] job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, [2, 3]) self.expect_populate(job_config) self.expect_add([2, 3], new_config) self.expect_watch_instances([2, 3]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok([2, 3]) self.verify_mocks() def test_noop_update(self): """No update calls happen if task configs are in sync.""" old_configs = self.make_task_configs(5) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_rollback(self): """Update process failures exceed total allowable count and update is rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_restart([0, 1, 2]) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_kill([2, 1, 0]) self.expect_add([2, 1, 0], old_configs[0]) self.expect_watch_instances([2, 1, 0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_rollback_sorted(self): """Rolling back with a batch of 1 should still be correctly sorted in reverse""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=0, max_per_shard_failures=1, batch_size=1) self.init_updater(update_config) old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0]) self.expect_add([0], new_config) self.expect_watch_instances([0]) self.expect_kill([1]) self.expect_add([1], new_config) self.expect_watch_instances([1]) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_kill([2]) self.expect_add([2], old_configs[0]) self.expect_watch_instances([2]) self.expect_kill([1]) self.expect_add([1], old_configs[0]) self.expect_watch_instances([1]) self.expect_kill([0]) self.expect_add([0], old_configs[0]) self.expect_watch_instances([0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_after_restart(self): """Update succeeds after failed instances are restarted.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(6) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 6) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_restart([0, 1, 2]) self.expect_watch_instances([0, 1, 2]) self.expect_kill([3, 4, 5]) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_cron_job(self): """Updating cron job.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_start_invalid_response(self): """The acquireLock call fails.""" self.expect_start(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_finish_invalid_response(self): """The releaseLock call fails.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_invalid_batch_size(self): """Test for out of range error for batch size.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(batch_size=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_restart_threshold(self): """Test for out of range error for restart threshold.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(restart_threshold=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_watch_secs(self): """Test for out of range error for watch secs.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(watch_secs=0) with raises(Updater.Error): self.init_updater(update_config) def test_update_invalid_response(self): """A response code other than success is returned by a scheduler RPC.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2], response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_job_does_not_exist(self): """Unable to update a job that does not exist.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_instances_outside_range(self): """Provided optional instance IDs are outside of remote | local scope.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR, instances=[3, 4]) self.verify_mocks() def test_update_skips_unretryable(self): """Update process skips instances exceeding max_per_shard_failures""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([0, 3, 4], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([5, 6]) self.expect_add([5, 6], new_config) self.expect_watch_instances([0, 5, 6], failed_instances=[0]) self.expect_kill([7, 8, 9]) self.expect_add([7, 8, 9], new_config) self.expect_watch_instances([7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_failed_unretryable_do_not_cause_rollback(self): """Update process still succeeds if failed instances in last batch are within allowed limit.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([0, 3, 4], failed_instances=[0]) self.expect_restart([0]) self.expect_watch_instances([0], failed_instances=[0]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks()
class UpdaterTest(TestCase): UPDATE_CONFIG = { 'batch_size': 3, 'restart_threshold': 50, 'watch_secs': 50, 'max_per_shard_failures': 0, 'max_total_failures': 0, } def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = 'test_session' self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy('test-cluster', self._scheduler, self._session_key) self.init_updater(deepcopy(self.UPDATE_CONFIG)) def replay_mocks(self): Replay(self._scheduler) Replay(self._instance_watcher) def verify_mocks(self): Verify(self._scheduler) Verify(self._instance_watcher) def init_updater(self, update_config): self._config = FakeConfig(self._role, self._name, self._env, update_config) self._updater = Updater(self._config, 3, self._scheduler_proxy, self._instance_watcher) def expect_watch_instances(self, instance_ids, failed_instances=[]): self._instance_watcher.watch(instance_ids).AndReturn( set(failed_instances)) def expect_populate(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, message='test') result = set([deepcopy(job_config.taskConfig)]) resp.result = Result(populateJobResult=PopulateJobResult( populated=result)) self._scheduler.populateJobConfig( job_config, JobConfigValidation.RUN_FILTERS).AndReturn(resp) def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append( ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult( tasks=scheduled)) query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response) def expect_cron_replace(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, message='test') self._scheduler.replaceCronTemplate(job_config, self._lock, self._session_key).AndReturn(resp) def expect_restart(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') self._scheduler.restartShards(self._job_key, instance_ids, self._lock, self._session_key).AndReturn(response) def expect_kill(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=frozenset([int(s) for s in instance_ids])) self._scheduler.killTasks(query, self._lock, self._session_key).AndReturn(response) def expect_add(self, instance_ids, task_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') add_config = AddInstancesConfig(key=self._job_key, taskConfig=task_config, instanceIds=frozenset( [int(s) for s in instance_ids])) self._scheduler.addInstances(add_config, self._lock, self._session_key).AndReturn(response) def expect_start(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') response.result = Result(acquireLockResult=AcquireLockResult( lock=self._lock)) self._scheduler.acquireLock(LockKey(job=self._job_key), self._session_key).AndReturn(response) def expect_finish(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') self._scheduler.releaseLock(self._lock, LockValidation.CHECKED, self._session_key).AndReturn(response) def make_task_configs(self, count=1): return [ TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=6.0, ramMb=1024, diskMb=2048, priority=0, maxTaskFailures=1, production=True, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. ) ] * count def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration(key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=task_config, instanceCount=instance_count) def update_and_expect_ok(self, instances=None): self.update_and_expect_response(ResponseCode.OK, instances) def update_and_expect_response(self, expected_code, instances=None): resp = self._updater.update(instances) assert expected_code == resp.responseCode, ( 'Expected response:%s Actual response:%s' % (expected_code, resp.responseCode)) def test_grow(self): """Adds instances to the existing job.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_add([6], new_config) self.expect_watch_instances([6]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_shrink(self): """Reduces the number of instances of the job.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([3, 4, 5]) self.expect_kill([6, 7, 8]) self.expect_kill([9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_grow(self): """Updates existing instances and adds new ones.""" old_configs = self.make_task_configs(3) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2]) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_add([6], new_config) self.expect_watch_instances([6]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_shrink(self): """Updates some existing instances and reduce the instance count.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 1) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0], new_config) self.expect_watch_instances([0]) self.expect_kill([3, 4, 5]) self.expect_kill([6, 7, 8]) self.expect_kill([9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_instances(self): """Update existing instances.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_with_instance_option(self): """Adding instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_add([3, 4], new_config) self.expect_watch_instances([3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[3, 4]) self.verify_mocks() def test_shrink_with_instance_option(self): """Reducing instance count by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 4) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([4, 5, 6]) self.expect_kill([7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[4, 5, 6, 7, 8, 9]) self.verify_mocks() def test_update_with_instance_option(self): """Updating existing instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([2, 3, 4]) self.expect_add([2, 3, 4], new_config) self.expect_watch_instances([2, 3, 4]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[2, 3, 4]) self.verify_mocks() def test_patch_hole_with_instance_option(self): """Patching an instance ID gap created by a terminated update.""" old_configs = self.make_task_configs(8) new_config = old_configs[0] job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, [2, 3]) self.expect_populate(job_config) self.expect_add([2, 3], new_config) self.expect_watch_instances([2, 3]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok([2, 3]) self.verify_mocks() def test_noop_update(self): """No update calls happen if task configs are in sync.""" old_configs = self.make_task_configs(5) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_rollback(self): """Update process failures exceed total allowable count and update is rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_restart([0, 1, 2]) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_kill([2, 1, 0]) self.expect_add([2, 1, 0], old_configs[0]) self.expect_watch_instances([2, 1, 0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_rollback_sorted(self): """Rolling back with a batch of 1 should still be correctly sorted in reverse""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=0, max_per_shard_failures=1, batch_size=1) self.init_updater(update_config) old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0]) self.expect_add([0], new_config) self.expect_watch_instances([0]) self.expect_kill([1]) self.expect_add([1], new_config) self.expect_watch_instances([1]) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_kill([2]) self.expect_add([2], old_configs[0]) self.expect_watch_instances([2]) self.expect_kill([1]) self.expect_add([1], old_configs[0]) self.expect_watch_instances([1]) self.expect_kill([0]) self.expect_add([0], old_configs[0]) self.expect_watch_instances([0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_after_restart(self): """Update succeeds after failed instances are restarted.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(6) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 6) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0, 1, 2]) self.expect_restart([0, 1, 2]) self.expect_watch_instances([0, 1, 2]) self.expect_kill([3, 4, 5]) self.expect_add([3, 4, 5], new_config) self.expect_watch_instances([3, 4, 5]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_cron_job(self): """Updating cron job.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_start_invalid_response(self): """The acquireLock call fails.""" self.expect_start(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_finish_invalid_response(self): """The releaseLock call fails.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_invalid_batch_size(self): """Test for out of range error for batch size.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(batch_size=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_restart_threshold(self): """Test for out of range error for restart threshold.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(restart_threshold=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_watch_secs(self): """Test for out of range error for watch secs.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(watch_secs=0) with raises(Updater.Error): self.init_updater(update_config) def test_update_invalid_response(self): """A response code other than success is returned by a scheduler RPC.""" old_configs = self.make_task_configs(5) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_instances_outside_range(self): """Provided optional instance IDs are outside of remote | local scope.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR, instances=[3, 4]) self.verify_mocks() def test_update_skips_unretryable(self): """Update process skips instances exceeding max_per_shard_failures""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([0, 3, 4], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([5, 6]) self.expect_add([5, 6], new_config) self.expect_watch_instances([0, 5, 6], failed_instances=[0]) self.expect_kill([7, 8, 9]) self.expect_add([7, 8, 9], new_config) self.expect_watch_instances([7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_failed_unretryable_do_not_cause_rollback(self): """Update process still succeeds if failed instances in last batch are within allowed limit.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_kill([0, 1, 2]) self.expect_add([0, 1, 2], new_config) self.expect_watch_instances([0, 1, 2], failed_instances=[0]) self.expect_restart([0]) self.expect_kill([3, 4]) self.expect_add([3, 4], new_config) self.expect_watch_instances([0, 3, 4], failed_instances=[0]) self.expect_restart([0]) self.expect_watch_instances([0], failed_instances=[0]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks()
class UpdaterTest(TestCase): UPDATE_CONFIG = { 'batch_size': 1, 'restart_threshold': 50, 'watch_secs': 50, 'max_per_shard_failures': 0, 'max_total_failures': 0, 'rollback_on_failure': True, } def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = 'test_session' self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._job_monitor = MockObject(JobMonitor) self._scheduler_mux = FakeSchedulerMux() self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy('test-cluster', self._scheduler, self._session_key) self._quota_check = MockObject(QuotaCheck) self.init_updater(deepcopy(self.UPDATE_CONFIG)) self._num_cpus = 1.0 self._num_ram = 1 self._num_disk = 1 def replay_mocks(self): Replay(self._scheduler) Replay(self._instance_watcher) Replay(self._quota_check) Replay(self._job_monitor) def verify_mocks(self): Verify(self._scheduler) Verify(self._instance_watcher) Verify(self._quota_check) Verify(self._job_monitor) def init_updater(self, update_config): self._config = FakeConfig(self._role, self._name, self._env, update_config) self._updater = Updater( self._config, 3, self._scheduler_proxy, self._instance_watcher, self._quota_check, self._job_monitor, self._scheduler_mux) def expect_terminate(self): self._job_monitor.terminate() self._instance_watcher.terminate() def expect_watch_instances(self, instance_ids, failed_instances=[]): for i in instance_ids: failed = [i] if i in failed_instances else [] self._instance_watcher.watch(instance_ids).AndReturn(set(failed)) def expect_populate(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, messageDEPRECATED='test') result = set([deepcopy(job_config.taskConfig)]) resp.result = Result(populateJobResult=PopulateJobResult(populated=result)) self._scheduler.populateJobConfig(job_config).AndReturn(resp) def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append(ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=scheduled)) query = TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response) def expect_cron_replace(self, job_config, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code resp = Response(responseCode=response_code, messageDEPRECATED='test') self._scheduler.replaceCronTemplate(job_config, self._lock, self._session_key).AndReturn(resp) def expect_restart(self, instance_ids, response_code=None): for i in instance_ids: response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') self._scheduler.restartShards( self._job_key, [i], self._lock, self._session_key).AndReturn(response) def expect_kill(self, instance_ids, response_code=None, monitor_result=True, skip_monitor=False): for i in instance_ids: response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') query = TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=frozenset([int(i)])) self._scheduler.killTasks(query, self._lock, self._session_key).AndReturn(response) self.expect_job_monitor(response_code, instance_ids, monitor_result, skip_monitor) def expect_job_monitor(self, response_code, instance_ids, monitor_result=True, skip=False): if skip or response_code != ResponseCode.OK: return self._job_monitor.wait_until(JobMonitor.terminal, instance_ids, with_timeout=True).AndReturn( monitor_result) def expect_add(self, instance_ids, task_config, response_code=None): for i in instance_ids: response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') add_config = AddInstancesConfig( key=self._job_key, taskConfig=task_config, instanceIds=frozenset([int(i)])) self._scheduler.addInstances(add_config, self._lock, self._session_key).AndReturn(response) def expect_update_instances(self, instance_ids, task_config): for i in instance_ids: self.expect_kill([i]) self.expect_add([i], task_config) self.expect_watch_instances([i]) def expect_add_instances(self, instance_ids, task_config): for i in instance_ids: self.expect_add([i], task_config) self.expect_watch_instances([i]) def expect_kill_instances(self, instance_ids): for i in instance_ids: self.expect_kill([i]) def expect_start(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') response.result = Result(acquireLockResult=AcquireLockResult(lock=self._lock)) self._scheduler.acquireLock(LockKey(job=self._job_key), self._session_key).AndReturn(response) def expect_finish(self, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') self._scheduler.releaseLock( self._lock, LockValidation.CHECKED, self._session_key).AndReturn(response) def expect_quota_check(self, num_released, num_acquired, response_code=None, prod=True): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') released = CapacityRequest(ResourceAggregate( numCpus=num_released * self._num_cpus, ramMb=num_released * self._num_ram, diskMb=num_released * self._num_disk)) acquired = CapacityRequest(ResourceAggregate( numCpus=num_acquired * self._num_cpus, ramMb=num_acquired * self._num_ram, diskMb=num_acquired * self._num_disk)) self._quota_check.validate_quota_from_requested( self._job_key, prod, released, acquired).AndReturn(response) def make_task_configs(self, count=1, prod=True): return [TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=self._num_cpus, ramMb=self._num_ram, diskMb=self._num_disk, priority=0, maxTaskFailures=1, production=prod, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. )] * count def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration( key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=deepcopy(task_config), instanceCount=instance_count ) def update_and_expect_ok(self, instances=None): self.update_and_expect_response(ResponseCode.OK, instances) def update_and_expect_response(self, expected_code, instances=None): resp = self._updater.update(instances) assert expected_code == resp.responseCode, ( 'Expected response:%s Actual response:%s' % (expected_code, resp.responseCode)) def test_grow(self): """Adds instances to the existing job.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 4) self.expect_add_instances([3, 4, 5, 6], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_fails_quota_check(self): """Adds instances to the existing job fails due to not enough quota.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 4, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(expected_code=ResponseCode.ERROR) self.verify_mocks() def test_non_to_prod_fails_quota_check(self): """Update with shrinking with non->prod transition fails quota check.""" old_configs = self.make_task_configs(4, prod=False) new_config = deepcopy(old_configs[0]) new_config.production = True job_config = self.make_job_config(new_config, 2) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 2, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(expected_code=ResponseCode.ERROR) self.verify_mocks() def test_prod_to_non_always_passes_quota_check(self): """Update with growth with prod->non transition always passes.""" old_configs = self.make_task_configs(1, prod=True) new_config = deepcopy(old_configs[0]) new_config.production = False job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(1, 0, prod=False) self.expect_kill([0]) self.expect_add_instances([0, 1, 2], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_shrink(self): """Reduces the number of instances of the job.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(7, 0) self.expect_kill_instances([3, 4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_grow(self): """Updates existing instances and adds new ones.""" old_configs = self.make_task_configs(3) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(3, 7) self.expect_update_instances([0, 1, 2], new_config) self.expect_add_instances([3, 4, 5, 6], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_shrink(self): """Updates some existing instances and reduce the instance count.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 1) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 1) self.expect_update_instances([0], new_config) self.expect_kill_instances([1, 2, 3, 4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_instances(self): """Update existing instances.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_update_instances([0, 1, 2, 3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_with_instance_option(self): """Adding instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 2) self.expect_add_instances([3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[3, 4]) self.verify_mocks() def test_shrink_with_instance_option(self): """Reducing instance count by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 4) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(6, 0) self.expect_kill_instances([4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[4, 5, 6, 7, 8, 9]) self.verify_mocks() def test_update_with_instance_option(self): """Updating existing instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(3, 3) self.expect_update_instances([2, 3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[2, 3, 4]) self.verify_mocks() def test_patch_hole_with_instance_option(self): """Patching an instance ID gap created by a terminated update.""" old_configs = self.make_task_configs(8) new_config = old_configs[0] job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, [2, 3]) self.expect_populate(job_config) self.expect_quota_check(0, 2) self.expect_add_instances([2, 3], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok([2, 3]) self.verify_mocks() def test_noop_update(self): """No update calls happen if task configs are in sync.""" old_configs = self.make_task_configs(5) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 0) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_rollback(self): """Update process failures exceed total allowable count and update is rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_update_instances([2, 1, 0], old_configs[0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_after_restart(self): """Update succeeds after failed instances are restarted.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(6) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 6) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(6, 6) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2]) self.expect_update_instances([3, 4, 5], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_cron_job(self): """Updating cron job.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_start_invalid_response(self): """The acquireLock call fails.""" self.expect_start(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_finish_invalid_response(self): """The releaseLock call fails.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_invalid_batch_size(self): """Test for out of range error for batch size.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(batch_size=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_restart_threshold(self): """Test for out of range error for restart threshold.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(restart_threshold=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_watch_secs(self): """Test for out of range error for watch secs.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(watch_secs=0) with raises(Updater.Error): self.init_updater(update_config) def test_update_invalid_response(self): """A response code other than success is returned by a scheduler RPC.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self._scheduler_mux.raise_error() self.expect_kill([0], skip_monitor=True) self.expect_terminate() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_kill_timeout(self): """Test job monitor timeout while waiting for tasks killed.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_kill([0], monitor_result=False) self.expect_terminate() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_job_does_not_exist(self): """Unable to update a job that does not exist.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_instances_outside_range(self): """Provided optional instance IDs are outside of remote | local scope.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR, instances=[3, 4]) self.verify_mocks() def test_update_skips_unretryable(self): """Update process skips instances exceeding max_per_shard_failures""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_update_instances([3, 4, 5, 6, 7, 8, 9], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_diff_unordered_configs(self): """Diff between two config objects with different repr but identical content works ok.""" from_config = self.make_task_configs()[0] from_config.constraints = set([ Constraint(name='value', constraint=ValueConstraint(values=set(['1', '2']))), Constraint(name='limit', constraint=TaskConstraint(limit=LimitConstraint(limit=int(10))))]) from_config.taskLinks = {'task1': 'link1', 'task2': 'link2'} from_config.metadata = set([ Metadata(key='k2', value='v2'), Metadata(key='k1', value='v1')]) from_config.executorConfig = ExecutorConfig(name='test', data='test data') from_config.requestedPorts = set(['3424', '142', '45235']) # Deepcopy() almost guarantees from_config != to_config due to a different sequence of # dict insertions. That in turn generates unequal json objects. The ideal here would be to # assert to_config != from_config but that would produce a flaky test as I have observed # the opposite on rare occasions as the ordering is not stable between test runs. to_config = deepcopy(from_config) diff_result = self._updater._diff_configs(from_config, to_config) assert diff_result == "", ( 'diff result must be empty but was: %s' % diff_result) def test_update_no_rollback(self): """Update process failures exceed total allowable count and update is not rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=1, rollback_on_failure=False) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_kill([0]) self.expect_add([0], new_config) self.expect_watch_instances([0], failed_instances=[0]) self.expect_restart([0]) self.expect_watch_instances([0], failed_instances=[0]) self.expect_kill([1]) self.expect_add([1], new_config) self.expect_watch_instances([1], failed_instances=[1]) self.expect_restart([1]) self.expect_watch_instances([1], failed_instances=[1]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks()
class UpdaterTest(TestCase): UPDATE_CONFIG = { 'batch_size': 1, 'restart_threshold': 50, 'watch_secs': 50, 'max_per_shard_failures': 0, 'max_total_failures': 0, 'rollback_on_failure': True, 'wait_for_batch_completion': False, } def setUp(self): self._role = 'mesos' self._name = 'jimbob' self._env = 'test' self._job_key = JobKey(name=self._name, environment=self._env, role=self._role) self._session_key = SessionKey() self._lock = 'test_lock' self._instance_watcher = MockObject(InstanceWatcher) self._job_monitor = MockObject(JobMonitor) self._scheduler_mux = FakeSchedulerMux() self._scheduler = MockObject(scheduler_client) self._scheduler_proxy = FakeSchedulerProxy( Cluster(name='test-cluster'), self._scheduler, self._session_key) self._quota_check = MockObject(QuotaCheck) self.init_updater(deepcopy(self.UPDATE_CONFIG)) self._num_cpus = 1.0 self._num_ram = 1 self._num_disk = 1 def replay_mocks(self): Replay(self._scheduler) Replay(self._instance_watcher) Replay(self._quota_check) Replay(self._job_monitor) def verify_mocks(self): Verify(self._scheduler) Verify(self._instance_watcher) Verify(self._quota_check) Verify(self._job_monitor) def init_updater(self, update_config): self._config = FakeConfig(self._role, self._name, self._env, update_config) self._updater = Updater(self._config, 3, self._scheduler_proxy, self._instance_watcher, self._quota_check, self._job_monitor, self._scheduler_mux) def expect_terminate(self): self._job_monitor.terminate() self._instance_watcher.terminate() def expect_watch_instances(self, instance_ids, failed_instances=[]): for i in instance_ids: failed = [i] if i in failed_instances else [] self._instance_watcher.watch(instance_ids).InAnyOrder().AndReturn( set(failed)) def expect_populate(self, job_config, response_code=ResponseCode.OK): resp = make_response(response_code) config = deepcopy(job_config.taskConfig) resp.result = Result(populateJobResult=PopulateJobResult( taskConfig=config)) self._scheduler.populateJobConfig(job_config).AndReturn(resp) def expect_get_tasks(self, tasks, ignore_ids=None, response_code=ResponseCode.OK): scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append( ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response = make_response(response_code) response.result = Result(scheduleStatusResult=ScheduleStatusResult( tasks=scheduled)) query = TaskQuery(jobKeys=[self._job_key], statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response) def expect_cron_replace(self, job_config, response_code=ResponseCode.OK): resp = make_response(response_code) self._scheduler.replaceCronTemplate(job_config, self._lock, self._session_key).AndReturn(resp) def expect_restart(self, instance_ids, response_code=None): for i in instance_ids: response_code = ResponseCode.OK if response_code is None else response_code response = make_response(response_code) self._scheduler.restartShards( self._job_key, [i], self._lock, self._session_key).AndReturn(response) def expect_kill(self, instance_ids, response_code=ResponseCode.OK, monitor_result=True, skip_monitor=False): for i in instance_ids: query = TaskQuery(jobKeys=[self._job_key], statuses=ACTIVE_STATES, instanceIds=frozenset([int(i)])) self._scheduler.killTasks( query, self._lock, self._session_key).InAnyOrder().AndReturn( make_response(response_code)) self.expect_job_monitor(response_code, instance_ids, monitor_result, skip_monitor) def expect_job_monitor(self, response_code, instance_ids, monitor_result=True, skip=False): if skip or response_code != ResponseCode.OK: return self._job_monitor.wait_until( JobMonitor.terminal, instance_ids, with_timeout=True).InAnyOrder().AndReturn(monitor_result) def expect_add(self, instance_ids, task_config, response_code=ResponseCode.OK): for i in instance_ids: add_config = AddInstancesConfig(key=self._job_key, taskConfig=task_config, instanceIds=frozenset([int(i)])) self._scheduler.addInstances( add_config, self._lock, self._session_key).InAnyOrder().AndReturn( make_response(response_code)) def expect_update_instances(self, instance_ids, task_config): for i in instance_ids: self.expect_kill([i]) self.expect_add([i], task_config) self.expect_watch_instances([i]) def expect_add_instances(self, instance_ids, task_config): for i in instance_ids: self.expect_add([i], task_config) self.expect_watch_instances([i]) def expect_kill_instances(self, instance_ids): for i in instance_ids: self.expect_kill([i]) def expect_start(self, response_code=ResponseCode.OK): response = make_response(response_code) response.result = Result(acquireLockResult=AcquireLockResult( lock=self._lock)) self._scheduler.acquireLock(LockKey(job=self._job_key), self._session_key).AndReturn(response) def expect_finish(self, response_code=ResponseCode.OK): self._scheduler.releaseLock(self._lock, LockValidation.CHECKED, self._session_key).AndReturn( make_response(response_code)) def expect_quota_check(self, num_released, num_acquired, response_code=ResponseCode.OK, prod=True): released = CapacityRequest( ResourceAggregate(numCpus=num_released * self._num_cpus, ramMb=num_released * self._num_ram, diskMb=num_released * self._num_disk)) acquired = CapacityRequest( ResourceAggregate(numCpus=num_acquired * self._num_cpus, ramMb=num_acquired * self._num_ram, diskMb=num_acquired * self._num_disk)) self._quota_check.validate_quota_from_requested( self._job_key, prod, released, acquired).AndReturn(make_response(response_code)) def make_task_configs(self, count=1, prod=True): return [ TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=self._num_cpus, ramMb=self._num_ram, diskMb=self._num_disk, priority=0, maxTaskFailures=1, production=prod, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. ) ] * count def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration(key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=deepcopy(task_config), instanceCount=instance_count) def update_and_expect_ok(self, instances=None): self.update_and_expect_response(ResponseCode.OK, instances) def update_and_expect_response(self, expected_code, instances=None, message=None): resp = self._updater.update(instances) assert expected_code == resp.responseCode, ( 'Expected response:%s Actual response:%s' % (expected_code, resp.responseCode)) if message: assert len(resp.details) == 1, ('Unexpected error count:%s' % len(resp.details)) assert message in resp.details[0].message, ( 'Expected %s message not found in: %s' % (message, resp.details[0].message)) def test_pulse_interval_not_supported(self): update_config = self.UPDATE_CONFIG.copy() update_config.update(pulse_interval_secs=60) with raises(Updater.Error) as e: self.init_updater(update_config) assert 'Pulse interval seconds is not supported in client updater.' in e.message def test_grow(self): """Adds instances to the existing job.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 4) self.expect_add_instances([3, 4, 5, 6], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_fails_quota_check(self): """Adds instances to the existing job fails due to not enough quota.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 4, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(expected_code=ResponseCode.ERROR) self.verify_mocks() def test_non_to_prod_fails_quota_check(self): """Update with shrinking with non->prod transition fails quota check.""" old_configs = self.make_task_configs(4, prod=False) new_config = deepcopy(old_configs[0]) new_config.production = True job_config = self.make_job_config(new_config, 2) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 2, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(expected_code=ResponseCode.ERROR) self.verify_mocks() def test_prod_to_non_always_passes_quota_check(self): """Update with growth with prod->non transition always passes.""" old_configs = self.make_task_configs(1, prod=True) new_config = deepcopy(old_configs[0]) new_config.production = False job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(1, 0, prod=False) self.expect_kill([0]) self.expect_add_instances([0, 1, 2], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_shrink(self): """Reduces the number of instances of the job.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(7, 0) self.expect_kill_instances([3, 4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_grow(self): """Updates existing instances and adds new ones.""" old_configs = self.make_task_configs(3) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 7) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(3, 7) self.expect_update_instances([0, 1, 2], new_config) self.expect_add_instances([3, 4, 5, 6], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_and_shrink(self): """Updates some existing instances and reduce the instance count.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 1) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 1) self.expect_update_instances([0], new_config) self.expect_kill_instances([1, 2, 3, 4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_instances(self): """Update existing instances.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_update_instances([0, 1, 2, 3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_grow_with_instance_option(self): """Adding instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 2) self.expect_add_instances([3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[3, 4]) self.verify_mocks() def test_shrink_with_instance_option(self): """Reducing instance count by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = old_configs[0] job_config = self.make_job_config(new_config, 4) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(6, 0) self.expect_kill_instances([4, 5, 6, 7, 8, 9]) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[4, 5, 6, 7, 8, 9]) self.verify_mocks() def test_update_with_instance_option(self): """Updating existing instances by providing an optional list of instance IDs.""" old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(3, 3) self.expect_update_instances([2, 3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok(instances=[2, 3, 4]) self.verify_mocks() def test_patch_hole_with_instance_option(self): """Patching an instance ID gap created by a terminated update.""" old_configs = self.make_task_configs(8) new_config = old_configs[0] job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, [2, 3]) self.expect_populate(job_config) self.expect_quota_check(0, 2) self.expect_add_instances([2, 3], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok([2, 3]) self.verify_mocks() def test_noop_update(self): """No update calls happen if task configs are in sync.""" old_configs = self.make_task_configs(5) new_config = old_configs[0] job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(0, 0) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_rollback(self): """Update process failures exceed total allowable count and update is rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_update_instances([2, 1, 0], old_configs[0]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_after_restart(self): """Update succeeds after failed instances are restarted.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=2, max_per_shard_failures=1) self.init_updater(update_config) old_configs = self.make_task_configs(6) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 6) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(6, 6) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2]) self.expect_update_instances([3, 4, 5], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_cron_job(self): """Updating cron job.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_start_invalid_response(self): """The acquireLock call fails.""" self.expect_start(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_finish_invalid_response(self): """The releaseLock call fails.""" new_config = self.make_task_configs(1)[0] job_config = self.make_job_config(new_config, 1, cron_schedule='cron') self._config.job_config = job_config self.expect_start() self.expect_cron_replace(job_config) self.expect_finish(response_code=ResponseCode.INVALID_REQUEST) self.replay_mocks() self.update_and_expect_response(ResponseCode.INVALID_REQUEST) self.verify_mocks() def test_invalid_batch_size(self): """Test for out of range error for batch size.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(batch_size=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_restart_threshold(self): """Test for out of range error for restart threshold.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(restart_threshold=0) with raises(Updater.Error): self.init_updater(update_config) def test_invalid_watch_secs(self): """Test for out of range error for watch secs.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(watch_secs=0) with raises(Updater.Error): self.init_updater(update_config) def test_update_invalid_response(self): """A response code other than success is returned by a scheduler RPC.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self._scheduler_mux.raise_error() self.expect_kill([0], skip_monitor=True) self.expect_terminate() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_kill_timeout(self): """Test job monitor timeout while waiting for tasks killed.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_kill([0], monitor_result=False) self.expect_terminate() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_failed_update_populates_error_details(self): """Test failed update populates Response.details.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_kill([0], monitor_result=False) self.expect_terminate() self.replay_mocks() self.update_and_expect_response( ResponseCode.ERROR, message="Aborting update without rollback") self.verify_mocks() def test_job_does_not_exist(self): """Unable to update a job that does not exist.""" old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs, response_code=ResponseCode.INVALID_REQUEST) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_instances_outside_range(self): """Provided optional instance IDs are outside of remote | local scope.""" old_configs = self.make_task_configs(3) new_config = old_configs[0] job_config = self.make_job_config(new_config, 3) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR, instances=[3, 4]) self.verify_mocks() def test_update_skips_unretryable(self): """Update process skips instances exceeding max_per_shard_failures""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=2) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_update_instances([0, 1], new_config) self.expect_kill([2]) self.expect_add([2], new_config) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_restart([2]) self.expect_watch_instances([2], failed_instances=[2]) self.expect_update_instances([3, 4, 5, 6, 7, 8, 9], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_diff_unordered_configs(self): """Diff between two config objects with different repr but identical content works ok.""" from_config = self.make_task_configs()[0] from_config.constraints = set([ Constraint(name='value', constraint=ValueConstraint(values=set(['1', '2']))), Constraint(name='limit', constraint=TaskConstraint(limit=LimitConstraint( limit=int(10)))) ]) from_config.taskLinks = {'task1': 'link1', 'task2': 'link2'} from_config.metadata = set( [Metadata(key='k2', value='v2'), Metadata(key='k1', value='v1')]) from_config.executorConfig = ExecutorConfig(name='test', data='test data') from_config.requestedPorts = set(['3424', '142', '45235']) # Deepcopy() almost guarantees from_config != to_config due to a different sequence of # dict insertions. That in turn generates unequal json objects. The ideal here would be to # assert to_config != from_config but that would produce a flaky test as I have observed # the opposite on rare occasions as the ordering is not stable between test runs. to_config = deepcopy(from_config) diff_result = self._updater._diff_configs(from_config, to_config) assert diff_result == "", ('diff result must be empty but was: %s' % diff_result) def test_update_no_rollback(self): """Update process failures exceed total allowable count and update is not rolled back.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(max_total_failures=1, max_per_shard_failures=1, rollback_on_failure=False) self.init_updater(update_config) old_configs = self.make_task_configs(10) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 10) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(10, 10) self.expect_kill([0]) self.expect_add([0], new_config) self.expect_watch_instances([0], failed_instances=[0]) self.expect_restart([0]) self.expect_watch_instances([0], failed_instances=[0]) self.expect_kill([1]) self.expect_add([1], new_config) self.expect_watch_instances([1], failed_instances=[1]) self.expect_restart([1]) self.expect_watch_instances([1], failed_instances=[1]) self.expect_finish() self.replay_mocks() self.update_and_expect_response(ResponseCode.ERROR) self.verify_mocks() def test_update_instances_wait_for_batch_completion_filled_batch(self): """Update existing instances with wait_for_batch_completion flag set.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(wait_for_batch_completion=True, batch_size=2) self.init_updater(update_config) old_configs = self.make_task_configs(6) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 6) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(6, 6) self.expect_update_instances([0, 1, 2, 3, 4, 5], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks() def test_update_instances_wait_for_batch_completion_partially_filled_batch( self): """Update existing instances with wait_for_batch_completion flag set.""" update_config = self.UPDATE_CONFIG.copy() update_config.update(wait_for_batch_completion=True, batch_size=3) self.init_updater(update_config) old_configs = self.make_task_configs(5) new_config = deepcopy(old_configs[0]) new_config.priority = 5 job_config = self.make_job_config(new_config, 5) self._config.job_config = job_config self.expect_start() self.expect_get_tasks(old_configs) self.expect_populate(job_config) self.expect_quota_check(5, 5) self.expect_update_instances([0, 1, 2, 3, 4], new_config) self.expect_finish() self.replay_mocks() self.update_and_expect_ok() self.verify_mocks()