def assert_correct_status_calls(cls, api): # getTasksStatus gets called a lot of times. The exact number isn't fixed; it loops # over the health checks until all of them pass for a configured period of time. # The minumum number of calls is 5: once before the tasks are restarted, and then # once for each batch of restarts (Since the batch size is set to 5, and the # total number of jobs is 20, that's 4 batches.) assert api.getTasksStatus.call_count >= 5 # In the first getStatus call, it uses an expansive query; in the rest, it only queries for # status RUNNING. status_calls = api.getTasksStatus.call_args_list assert status_calls[0][0][0] == TaskQuery(taskIds=None, jobName='hello', environment='test', owner=Identity( role=u'mchucarroll', user=None), statuses=cls.QUERY_STATUSES) for status_call in status_calls[1:]: status_call[0][0] == TaskQuery(taskIds=None, jobName='hello', environment='test', owner=Identity(role='mchucarroll', user=None), statuses=set( [ScheduleStatus.RUNNING]))
def get_expected_task_query(cls, shards=None): instance_ids = frozenset(shards) if shards is not None else None # Helper to create the query that will be a parameter to job kill. return TaskQuery(taskIds=None, jobName=cls.TEST_JOB, environment=cls.TEST_ENV, instanceIds=instance_ids, owner=Identity(role=cls.TEST_ROLE, user=None))
def __init__(self, client, role, env, jobname): self._client = client self._query = TaskQuery(owner=Identity(role=role), environment=env, jobName=jobname) self._initial_tasks = set() self._initial_tasks = set(task.assignedTask.taskId for task in self.iter_query())
def assert_correct_killtask_calls(cls, api): assert api.killTasks.call_count == 4 # Check the last call's parameters. api.killTasks.assert_called_with( TaskQuery(taskIds=None, jobName='hello', environment='test', instanceIds=frozenset([16, 17, 18, 19, 15]), owner=Identity(role=u'mchucarroll', user=None), statuses=cls.QUERY_STATUSES), 'foo')
def scheduler_delete_recovery_tasks(cluster, task_ids): """usage: scheduler_delete_recovery_tasks cluster task_ids Deletes a comma-separated list of task IDs from a staged recovery. """ ids = set(task_ids.split(',')) options = app.get_options() check_and_log_response( AuroraClientAPI(CLUSTERS[cluster], options.verbosity).delete_recovery_tasks( TaskQuery(taskIds=ids)))
def expect_kill(self, instance_ids, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=frozenset([int(s) for s in instance_ids])) self._scheduler.killTasks(query, self._lock, self._session_key).AndReturn(response)
def test_successful_run(self): """Test the run command.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_status_response( ) sandbox_args = { 'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun' } with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.aurora.client.commands.run.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch( 'twitter.aurora.client.api.command_runner.DistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.Popen', return_value=self.create_mock_process())) as ( mock_scheduler_proxy_class, mock_clusters, mock_clusters_runpatch, options, mock_runner_args_patch, mock_subprocess): run(['west/mchucarroll/test/hello', 'ls'], mock_options) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll'), statuses=set([ ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING ]))) # The mock status call returns 3 three ScheduledTasks, so three commands should have been run assert mock_subprocess.call_count == 3 mock_subprocess.assert_called_with([ 'ssh', '-n', '-q', 'mchucarroll@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls' ], stderr=-2, stdout=-1)
def test_init(self): result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=[])) response = Response(responseCode=ResponseCode.OK, message="test", result=result) query = TaskQuery(owner=Identity(role=ROLE), environment=ENV, jobName=JOB_NAME) self.mock_scheduler.getTasksStatus(query).AndReturn(response) self.mox.ReplayAll() JobMonitor(self.mock_api, ROLE, ENV, JOB_NAME)
def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, message='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append( ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult( tasks=scheduled)) query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response)
def test_unsuccessful_status(self): """Test the status command when the user asks the status of a job that doesn't exist.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_failed_status_response() with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options)) as ( mock_scheduler_proxy_class, mock_clusters, options): self.assertRaises(SystemExit, status, ['west/mchucarroll/test/hello'], mock_options) mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll')))
def scheduler_print_recovery_tasks(cluster): """usage: scheduler_print_recovery_tasks cluster Prints all active tasks in a staged recovery. """ options = app.get_options() resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).query_recovery( TaskQuery(statuses=ACTIVE_STATES)) check_and_log_response(resp) log.info('Role\tJob\tShard\tStatus\tTask ID') for task in resp.tasks: assigned = task.assignedTask conf = assigned.task log.info('\t'.join( (conf.owner.role, conf.jobName, str(assigned.instanceId), ScheduleStatus._VALUES_TO_NAMES[task.status], assigned.taskId)))
def test_successful_status(self): """Test the status command.""" # Calls api.check_status, which calls scheduler.getJobs mock_options = self.setup_mock_options() (mock_api, mock_scheduler) = self.create_mock_api() mock_scheduler.getTasksStatus.return_value = self.create_status_response() with contextlib.nested( patch('twitter.aurora.client.api.SchedulerProxy', return_value=mock_scheduler), patch('twitter.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options)) as ( mock_scheduler_proxy_class, mock_clusters, options): status(['west/mchucarroll/test/hello'], mock_options) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll')))
def _get_tasks_by_instance_id(self, instance_ids): log.debug('Querying instance statuses.') query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids try: resp = self._scheduler.getTasksStatus(query) except IOError as e: log.error('IO Exception during scheduler call: %s' % e) return [] tasks = [] if resp.responseCode == ResponseCode.OK: tasks = resp.result.scheduleStatusResult.tasks log.debug( 'Response from scheduler: %s (message: %s)' % (ResponseCode._VALUES_TO_NAMES[resp.responseCode], resp.message)) return tasks
def _get_tasks_by_instance_id(self, instance_ids): log.debug('Querying instance statuses.') query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids try: resp = self._scheduler.getTasksStatus(query) except IOError as e: log.error('IO Exception during scheduler call: %s' % e) return [] tasks = [] if resp.responseCode == ResponseCode.OK: tasks = resp.result.scheduleStatusResult.tasks log.debug('Response from scheduler: %s (message: %s)' % (ResponseCode._VALUES_TO_NAMES[resp.responseCode], resp.message)) return tasks
def create_mock_query(cls): return TaskQuery(owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=cls.TEST_JOB)
def _create_task_query(self, instanceIds=None): return TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES, instanceIds=instanceIds)
def to_thrift_query(self): return TaskQuery(owner=Identity(role=self.role), environment=self.env, jobName=self.name)
def build_query(cls, role, job, instances=None, statuses=LIVE_STATES, env=None): return TaskQuery(owner=Identity(role=role), jobName=job, statuses=statuses, instanceIds=instances, environment=env)
def query_from(cls, role, env, job): return TaskQuery(statuses=LIVE_STATES, owner=Identity(role), jobName=job, environment=env)