def assert_correct_status_calls(cls, api): # getTasksWithoutConfigs gets called a lot of times. The exact number isn't fixed; it loops # over the health checks until all of them pass for a configured period of time. # The minumum number of calls is 20: once before the tasks are restarted, and then # once for each batch of restarts (Since the batch size is set to 1, and the # total number of tasks is 20, that's 20 batches.) assert api.getTasksWithoutConfigs.call_count >= 4 status_calls = api.getTasksWithoutConfigs.call_args_list for status_call in status_calls: status_call[0][0] == TaskQuery(taskIds=None, jobKeys=[ JobKey(role='bozo', environment='test', name='hello') ], statuses=set( [ScheduleStatus.RUNNING])) # getTasksStatus is called only once to build an generate update instructions assert api.getTasksStatus.call_count == 1 api.getTasksStatus.assert_called_once_with( TaskQuery(taskIds=None, jobKeys=[ JobKey(role='bozo', environment='test', name='hello') ], statuses=ACTIVE_STATES))
def test_raise_auth_error(self): self.mock_thrift_client.killTasks(TaskQuery(), None, None, SESSION).AndRaise( TRequestsTransport.AuthError()) self.mox.ReplayAll() with pytest.raises(scheduler_client.SchedulerProxy.AuthError): self.make_scheduler_proxy().killTasks(TaskQuery(), None, None)
def test_raise_auth_error(self): self.mock_thrift_client.killTasks(TaskQuery(), None, None).AndRaise( TRequestsTransport.AuthError()) self.mock_scheduler_client.get_failed_auth_message().AndReturn('failed auth') self.mox.ReplayAll() with pytest.raises(scheduler_client.SchedulerProxy.AuthError): self.make_scheduler_proxy().killTasks(TaskQuery(), None, None)
def task_query(cls, options): query = TaskQuery( role=options.role, environment=options.environment, limit=options.limit) if options.states: query.statuses = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) return query
def task_query(cls, options): query = TaskQuery(role=options.role, environment=options.environment, limit=options.limit) if options.states: query.statuses = set( map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) return query
def expect_task_status(self, once=False, instances=None): query = TaskQuery(jobKeys=[ JobKey(role=self._job_key.role, environment=self._job_key.env, name=self._job_key.name)]) if instances is not None: query.instanceIds = frozenset([int(s) for s in instances]) if once: self._scheduler.getTasksWithoutConfigs.assert_called_once_with(query) else: self._scheduler.getTasksWithoutConfigs.assert_called_with(query)
def expect_task_status(self, once=False, instances=None): query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.env, jobName=self._job_key.name) if instances is not None: query.instanceIds = frozenset([int(s) for s in instances]) if once: self._scheduler.getTasksWithoutConfigs.assert_called_once_with( query) else: self._scheduler.getTasksWithoutConfigs.assert_called_with(query)
def expect_task_status(self, once=False, instances=None): query = TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.env, jobName=self._job_key.name) if instances is not None: query.instanceIds = frozenset([int(s) for s in instances]) if once: self._scheduler.getTasksStatus.assert_called_once_with(query) else: self._scheduler.getTasksStatus.assert_called_with(query)
def test_status_api_failure(self): mock_scheduler_client = create_autospec(spec=SchedulerClient, instance=True) mock_thrift_client = create_autospec(spec=AuroraAdmin.Client, instance=True) mock_scheduler_client.get_thrift_client.return_value = mock_thrift_client mock_thrift_client.getTasksWithoutConfigs.side_effect = IOError( "Uh-Oh") with contextlib.nested( patch( 'apache.aurora.client.api.scheduler_client.SchedulerClient.get', return_value=mock_scheduler_client), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)): cmd = AuroraCommandLine() # This should create a scheduler client, set everything up, and then issue a # getTasksWithoutConfigs call against the mock_scheduler_client. That should raise an # exception, which results in the command failing with an error code. result = cmd.execute(['job', 'status', 'west/bozo/test/hello']) assert result == EXIT_UNKNOWN_ERROR assert mock_thrift_client.getTasksWithoutConfigs.mock_calls == [ call( TaskQuery(jobKeys=[ JobKey(role='bozo', environment='test', name='hello') ])) ]
def get_expected_task_query(cls, shards=None): instance_ids = frozenset(shards) if shards is not None else None # Helper to create the query that will be a parameter to job kill. return TaskQuery( taskIds=None, instanceIds=instance_ids, jobKeys=[JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=cls.TEST_JOB)])
def create_query(self, instances=None): return TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.env, jobName=self._job_key.name, instanceIds=frozenset([int(s) for s in instances]) if instances else None)
def test_cron_diff(self): config = self.get_job_config(is_cron=True) query = TaskQuery(jobKeys=[self.TEST_JOBKEY.to_thrift()], statuses=ACTIVE_STATES) self._fake_context.get_job_config = Mock(return_value=config) self._mock_api.populate_job_config.return_value = self.populate_job_config_result( ) self._mock_api.query.return_value = self.create_empty_task_result() self._mock_api.build_query.return_value = query with contextlib.nested(patch('subprocess.call', return_value=0), patch('json.loads', return_value={})) as (subprocess_patch, _): result = self._command.execute(self._fake_context) assert result == EXIT_OK assert self._mock_api.populate_job_config.mock_calls == [ call(config) ] assert self._mock_api.query.mock_calls == [call(query)] assert subprocess_patch.call_count == 1 assert subprocess_patch.call_args[0][0].startswith( os.environ.get('DIFF_VIEWER', 'diff') + ' ')
def get_expected_task_query(cls, instances=None): instance_ids = frozenset(instances) if instances is not None else None return TaskQuery(taskIds=None, instanceIds=instance_ids, jobKeys=[JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=cls.TEST_JOB)])
def test_transient_error(_, client): mock_scheduler_client = mock.create_autospec( spec=scheduler_client.SchedulerClient, spec_set=False, instance=True) mock_thrift_client = mock.create_autospec(spec=AuroraAdmin.Client, instance=True) mock_thrift_client.killTasks.side_effect = [ Response(responseCode=ResponseCode.ERROR_TRANSIENT, details=[ ResponseDetail(message="message1"), ResponseDetail(message="message2") ], serverInfo=DEFAULT_RESPONSE.serverInfo), Response(responseCode=ResponseCode.ERROR_TRANSIENT, serverInfo=DEFAULT_RESPONSE.serverInfo), Response(responseCode=ResponseCode.OK, serverInfo=DEFAULT_RESPONSE.serverInfo) ] mock_scheduler_client.get_thrift_client.return_value = mock_thrift_client client.get.return_value = mock_scheduler_client proxy = TestSchedulerProxy(Cluster(name='local')) proxy.killTasks(TaskQuery(), None) assert mock_thrift_client.killTasks.call_count == 3
def test_kill_job_with_instances_deep_api(self): """Test kill client-side API logic.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() with contextlib.nested( patch('threading._Event.wait'), patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)): mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response( ) self.setup_get_tasks_status_calls(mock_scheduler_proxy) with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() cmd = AuroraCommandLine() cmd.execute([ 'job', 'kill', '--config=%s' % fp.name, 'west/bozo/test/hello/0,2,4-6' ]) # Now check that the right API calls got made. assert mock_scheduler_proxy.killTasks.call_count == 1 mock_scheduler_proxy.killTasks.assert_called_with( TaskQuery(jobName='hello', environment='test', instanceIds=frozenset([0, 2, 4, 5, 6]), owner=Identity(role='bozo')), None)
def test_successful_diff(self): """Test the diff command.""" mock_options = self.setup_mock_options() (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() self.setup_populate_job_config(mock_scheduler_proxy) with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch('subprocess.call', return_value=0), patch('json.loads', return_value=Mock())) as ( mock_scheduler_proxy_class, mock_clusters, options, subprocess_patch, json_patch): with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() diff(['west/mchucarroll/test/hello', fp.name]) # Diff should get the task status, populate a config, and run diff. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll'), statuses=ACTIVE_STATES)) assert mock_scheduler_proxy.populateJobConfig.call_count == 1 assert isinstance(mock_scheduler_proxy.populateJobConfig.call_args[0][0], JobConfiguration) assert (mock_scheduler_proxy.populateJobConfig.call_args[0][0].key == JobKey(environment=u'test', role=u'mchucarroll', name=u'hello')) # Subprocess should have been used to invoke diff with two parameters. assert subprocess_patch.call_count == 1 assert len(subprocess_patch.call_args[0][0]) == 3 assert subprocess_patch.call_args[0][0][0] == os.environ.get('DIFF_VIEWER', 'diff')
def assert_query(cls, fake_api): calls = [ call( TaskQuery(jobKeys=[cls.TEST_JOBKEY.to_thrift()], statuses=ACTIVE_STATES)) ] assert fake_api.query_no_configs.mock_calls == calls
def test_service_diff_old_api(self): config = self.get_job_config() query = TaskQuery(jobKeys=[self.TEST_JOBKEY.to_thrift()], statuses=ACTIVE_STATES) self._fake_context.get_job_config = Mock(return_value=config) self._mock_api.populate_job_config.return_value = self.populate_job_config_result( ) self._mock_api.get_job_update_diff.side_effect = SchedulerProxy.ThriftInternalError( "Expected") self._mock_api.query.return_value = self.create_empty_task_result() self._mock_api.build_query.return_value = query with contextlib.nested(patch('subprocess.call', return_value=0), patch('json.loads', return_value={})) as (subprocess_patch, _): result = self._command.execute(self._fake_context) assert result == EXIT_OK assert self._mock_api.populate_job_config.mock_calls == [ call(config) ] assert self._mock_api.get_job_update_diff.mock_calls == [ call(config, self._mock_options.instance_spec.instance) ] assert self._mock_api.query.mock_calls == [call(query)] assert subprocess_patch.call_count == 1 assert subprocess_patch.call_args[0][0].startswith( os.environ.get('DIFF_VIEWER', 'diff') + ' ')
def test_diff_server_error(self): """Test the diff command if the user passes a config with an error in it.""" mock_options = self.setup_mock_options() (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_failed_status_response( ) self.setup_populate_job_config(mock_scheduler_proxy) with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch('subprocess.call', return_value=0), patch('json.loads', return_value=Mock())) as (mock_scheduler_proxy_class, mock_clusters, options, subprocess_patch, json_patch): with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() cmd = AuroraCommandLine() result = cmd.execute( ['job', 'diff', 'west/bozo/test/hello', fp.name]) assert result == EXIT_INVALID_PARAMETER # In this error case, we should have called the server getTasksStatus; # but since it fails, we shouldn't call populateJobConfig or subprocess. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='bozo'), statuses=ACTIVE_STATES)) assert mock_scheduler_proxy.populateJobConfig.call_count == 0 assert subprocess_patch.call_count == 0
def test_successful_diff(self): """Test the diff command.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('subprocess.call', return_value=0), patch('json.loads', return_value=Mock())) as (_, _, subprocess_patch, _): mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() self.setup_populate_job_config(mock_scheduler_proxy) with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() cmd = AuroraCommandLine() cmd.execute(['job', 'diff', 'west/bozo/test/hello', fp.name]) # Diff should get the task status, populate a config, and run diff. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobKeys=[JobKey(role='bozo', environment='test', name='hello')], statuses=ACTIVE_STATES)) assert mock_scheduler_proxy.populateJobConfig.call_count == 1 assert isinstance(mock_scheduler_proxy.populateJobConfig.call_args[0][0], JobConfiguration) assert (mock_scheduler_proxy.populateJobConfig.call_args[0][0].key == JobKey(environment=u'test', role=u'bozo', name=u'hello')) # Subprocess should have been used to invoke diff with two parameters. assert subprocess_patch.call_count == 1 assert len(subprocess_patch.call_args[0][0]) == 3 assert subprocess_patch.call_args[0][0][0] == os.environ.get('DIFF_VIEWER', 'diff')
def scheduler_delete_recovery_tasks(cluster, task_ids): """usage: scheduler_delete_recovery_tasks cluster task_ids Deletes a comma-separated list of task IDs from a staged recovery. """ ids = set(task_ids.split(',')) check_and_log_response(make_admin_client(cluster).delete_recovery_tasks(TaskQuery(taskIds=ids)))
def test_successful_ssh(self): """Test the ssh command.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() sandbox_args = {'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun'} with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.api.command_runner.DistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.call', return_value=0)) as ( mock_scheduler_proxy_class, mock_clusters, mock_runner_args_patch, mock_subprocess): cmd = AuroraCommandLine() cmd.execute(['task', 'ssh', 'west/bozo/test/hello/1', '--command=ls']) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler_proxy.getTasksStatus.assert_called_with(TaskQuery( jobKeys=[JobKey(role='bozo', environment='test', name='hello')], instanceIds=set([1]), statuses=set([ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING, ScheduleStatus.DRAINING ]))) mock_subprocess.assert_called_with(['ssh', '-t', 'bozo@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls'])
def get_expected_task_query(cls, instances=None): instance_ids = frozenset(instances) if instances is not None else None return TaskQuery(taskIds=None, jobName=cls.TEST_JOB, environment=cls.TEST_ENV, instanceIds=instance_ids, owner=Identity(role=cls.TEST_ROLE, user=None))
def test_get_domain_uptime_vector_with_hosts_no_job_tasks(self): with patch('apache.aurora.client.api.sla.task_query', return_value=TaskQuery()) as (mock_query): self.mock_get_tasks([]) self._sla.get_domain_uptime_vector(self._cluster, self._min_count, ['h1']) mock_query.assert_called_once_with(hosts=['h1'])
def test_successful_status_deep_null_metadata(self): (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksWithoutConfigs.return_value = self.create_status_null_metadata() with patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy): cmd = AuroraCommandLine() cmd.execute(['job', 'status', 'west/bozo/test/hello']) mock_scheduler_proxy.getTasksWithoutConfigs.assert_called_with( TaskQuery(jobKeys=[JobKey(role='bozo', environment='test', name='hello')]))
def expect_task_status_call_job_scoped(self): self._scheduler.getTasksWithoutConfigs.assert_called_once_with( TaskQuery( owner=Identity(role=self._role), environment=self._env, jobName=self._name, statuses=LIVE_STATES) )
def generic_test_successful_run(self, cmd_args, instances): """Common structure of all successful run tests. Params: cmd_args: the arguments to pass to the aurora command line to run this test. instances: the list of instances that should be passed to a status query. (The status query is the only visible difference between a sharded run, and an all-instances run in the test.) """ # Calls api.check_status, which calls scheduler_proxy.getJobs (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response( ) sandbox_args = { 'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun' } with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('apache.aurora.client.cli.task.CLUSTERS', new=self.TEST_CLUSTERS), patch( 'apache.aurora.client.api.command_runner.' 'InstanceDistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.Popen', return_value=self.create_mock_process())) as ( mock_scheduler_proxy_class, mock_clusters, mock_clusters_cli, mock_runner_args_patch, mock_subprocess): cmd = AuroraCommandLine() assert cmd.execute(cmd_args) == EXIT_OK # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. The use of shards, above, should change # this query - that's the focus of the instances test. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobKeys=[ JobKey(role='bozo', environment='test', name='hello') ], statuses=set([ ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING, ScheduleStatus.DRAINING ]), instanceIds=instances)) # The mock status call returns 3 three ScheduledTasks, so three commands should have been run assert mock_subprocess.call_count == 3 mock_subprocess.assert_called_with([ 'ssh', '-n', '-q', 'bozo@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls' ], stderr=-2, stdout=-1)
def scheduler_delete_recovery_tasks(cluster, task_ids): """usage: scheduler_delete_recovery_tasks cluster task_ids Deletes a comma-separated list of task IDs from a staged recovery. """ ids = set(task_ids.split(',')) options = app.get_options() check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity) .delete_recovery_tasks(TaskQuery(taskIds=ids)))
def assert_correct_killtask_calls(cls, api): assert api.killTasks.call_count == 20 # Check the last call's parameters. api.killTasks.assert_called_with( TaskQuery(taskIds=None, jobName='hello', environment='test', instanceIds=frozenset([19]), owner=Identity(role=u'mchucarroll', user=None), statuses=ACTIVE_STATES), 'foo')
def create_query(self, instances=None): return TaskQuery( jobKeys=[ JobKey(role=self._job_key.role, environment=self._job_key.env, name=self._job_key.name) ], instanceIds=frozenset([int(s) for s in instances]) if instances else None)
def expect_get_tasks(self, tasks, ignore_ids=None, response_code=ResponseCode.OK): scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append(ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response = make_response(response_code) response.result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=scheduled)) query = TaskQuery(jobKeys=[self._job_key], statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response)
def assert_correct_killtask_calls(cls, api): assert api.killTasks.call_count == 20 # Check the last call's parameters. api.killTasks.assert_called_with( TaskQuery(taskIds=None, jobKeys=[JobKey(role='bozo', environment='test', name='hello')], instanceIds=frozenset([19]), statuses=ACTIVE_STATES), Lock(key='foo', token='token'))
def get_tasks_status_query(self, instance_ids): query = TaskQuery() query.owner = Identity(role=self._role) query.environment = self._env query.jobName = self._name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = set(instance_ids) return query
def prune_tasks(args, options): if len(args) == 0: die('Must specify at least cluster.') cluster = args[0] t = TaskQuery() if options.states: t.statuses = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) if options.role: t.role = options.role if options.environment: t.environment = options.environment if options.limit: t.limit = options.limit api = make_admin_client_with_options(cluster) rsp = api.prune_tasks(t) if rsp.responseCode != ResponseCode.OK: die('Failed to prune tasks: %s' % combine_messages(rsp)) else: print("Tasks pruned.")
def _create_query(self, instance_ids): query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids return query
def _get_tasks_by_instance_id(self, instance_ids): log.debug('Querying instance statuses.') query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids try: resp = self._scheduler.getTasksStatus(query) except IOError as e: log.error('IO Exception during scheduler call: %s' % e) return [] tasks = [] if resp.responseCode == ResponseCode.OK: tasks = resp.result.scheduleStatusResult.tasks log.debug('Response from scheduler: %s (message: %s)' % (ResponseCode._VALUES_TO_NAMES[resp.responseCode], resp.messageDEPRECATED)) return tasks
def create_query(cls, instances): query = TaskQuery() query.instanceIds = set(instances) return query
def get_tasks_status_query(self, instance_ids): query = TaskQuery() query.jobKeys = set([self._job_key]) query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = set(instance_ids) return query
def _create_query(self, instance_ids): query = TaskQuery() query.jobKeys = set([self._job_key]) query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids return query