def assert_correct_status_calls(cls, api): # getTasksWithoutConfigs gets called a lot of times. The exact number isn't fixed; it loops # over the health checks until all of them pass for a configured period of time. # The minumum number of calls is 20: once before the tasks are restarted, and then # once for each batch of restarts (Since the batch size is set to 1, and the # total number of tasks is 20, that's 20 batches.) assert api.getTasksWithoutConfigs.call_count >= 4 status_calls = api.getTasksWithoutConfigs.call_args_list for status_call in status_calls: status_call[0][0] == TaskQuery( taskIds=None, jobName='hello', environment='test', owner=Identity(role='mchucarroll', user=None), statuses=set([ScheduleStatus.RUNNING])) # getTasksStatus is called only once to build an generate update instructions assert api.getTasksStatus.call_count == 1 api.getTasksStatus.assert_called_once_with(TaskQuery( taskIds=None, jobName='hello', environment='test', owner=Identity(role=u'mchucarroll', user=None), statuses=ACTIVE_STATES))
def test_kill_job_with_instances_deep_api(self): """Test kill client-side API logic.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() with contextlib.nested( patch('threading._Event.wait'), patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)): mock_scheduler_proxy.killTasks.return_value = self.get_kill_job_response( ) self.setup_get_tasks_status_calls(mock_scheduler_proxy) with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() cmd = AuroraCommandLine() cmd.execute([ 'job', 'kill', '--config=%s' % fp.name, 'west/bozo/test/hello/0,2,4-6' ]) # Now check that the right API calls got made. assert mock_scheduler_proxy.killTasks.call_count == 1 mock_scheduler_proxy.killTasks.assert_called_with( TaskQuery(jobName='hello', environment='test', instanceIds=frozenset([0, 2, 4, 5, 6]), owner=Identity(role='bozo')), None)
def create_scheduled_task(instance, start_time): task = ScheduledTask() task.assignedTask = AssignedTask() task.assignedTask.taskId = "task_%s" % instance task.assignedTask.slaveId = "random_machine_id" task.assignedTask.slaveHost = "junk.nothing" task.assignedTask.task = TaskConfig() task.assignedTask.task.owner = Identity(role="nobody") task.assignedTask.task.environment = "prod" task.assignedTask.task.jobName = "flibber" task.assignedTask.task.isService = False task.assignedTask.task.numCpus = 2 task.assignedTask.task.ramMb = 2048 task.assignedTask.task.diskMb = 4096 task.assignedTask.task.priority = 7 task.assignedTask.task.maxTaskFailures = 3 task.assignedTask.task.production = False task.assignedTask.task.requestedPorts = ["http"] task.assignedTask.assignedPorts = {"http": 1001} task.assignedTask.instanceId = 0 task.status = 2 task.failureCount = instance + 4 task.taskEvents = create_task_events(start_time) task.ancestorId = "random_task_ancestor%s" % instance return task
def test_successful_diff(self): """Test the diff command.""" mock_options = self.setup_mock_options() (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() self.setup_populate_job_config(mock_scheduler_proxy) with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch('subprocess.call', return_value=0), patch('json.loads', return_value=Mock())) as ( mock_scheduler_proxy_class, mock_clusters, options, subprocess_patch, json_patch): with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() diff(['west/mchucarroll/test/hello', fp.name]) # Diff should get the task status, populate a config, and run diff. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='mchucarroll'), statuses=ACTIVE_STATES)) assert mock_scheduler_proxy.populateJobConfig.call_count == 1 assert isinstance(mock_scheduler_proxy.populateJobConfig.call_args[0][0], JobConfiguration) assert (mock_scheduler_proxy.populateJobConfig.call_args[0][0].key == JobKey(environment=u'test', role=u'mchucarroll', name=u'hello')) # Subprocess should have been used to invoke diff with two parameters. assert subprocess_patch.call_count == 1 assert len(subprocess_patch.call_args[0][0]) == 3 assert subprocess_patch.call_args[0][0][0] == os.environ.get('DIFF_VIEWER', 'diff')
def test_simple_config(): job = convert_pystachio_to_thrift(HELLO_WORLD, ports=frozenset(['health'])) expected_key = JobKey( role=HELLO_WORLD.role().get(), environment=HELLO_WORLD.environment().get(), name=HELLO_WORLD.name().get()) assert job.instanceCount == 1 tti = job.taskConfig assert job.key == expected_key assert job.owner == Identity(user=getpass.getuser()) assert job.cronSchedule is None assert tti.job == expected_key assert tti.isService is False assert tti.numCpus == 0.1 assert tti.ramMb == 64 assert tti.diskMb == 64 assert tti.requestedPorts == frozenset(['health']) assert tti.production is False assert tti.priority == 0 assert tti.maxTaskFailures == 1 assert tti.constraints == set() assert tti.metadata == set() assert tti.tier is None assert Resource(numCpus=0.1) in list(tti.resources) assert Resource(ramMb=64) in list(tti.resources) assert Resource(diskMb=64) in list(tti.resources) assert Resource(namedPort='health') in list(tti.resources)
def create_mock_scheduled_tasks(cls): jobs = [] for name in ['foo', 'bar', 'baz']: job = Mock() job.key = JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name) job.failure_count = 0 job.assignedTask = Mock(spec=AssignedTask) job.assignedTask.slaveHost = 'slavehost' job.assignedTask.task = Mock(spec=TaskConfig) job.assignedTask.task.maxTaskFailures = 1 job.assignedTask.task.executorConfig = Mock(spec=ExecutorConfig) job.assignedTask.task.executorConfig.data = Mock() job.assignedTask.task.metadata = [] job.assignedTask.task.owner = Identity(role='bozo') job.assignedTask.task.environment = 'test' job.assignedTask.task.jobName = 'woops' job.assignedTask.task.numCpus = 2 job.assignedTask.task.ramMb = 2 job.assignedTask.task.diskMb = 2 job.assignedTask.instanceId = 4237894 job.assignedTask.assignedPorts = None job.status = ScheduleStatus.RUNNING mockEvent = Mock(spec=TaskEvent) mockEvent.timestamp = 28234726395 mockEvent.status = ScheduleStatus.RUNNING mockEvent.message = "Hi there" job.taskEvents = [mockEvent] jobs.append(job) return jobs
def create_query(self, instances=None): return TaskQuery( owner=Identity(role=self._job_key.role), environment=self._job_key.env, jobName=self._job_key.name, instanceIds=frozenset([int(s) for s in instances]) if instances else None)
def get_expected_task_query(cls, instances=None): instance_ids = frozenset(instances) if instances is not None else None return TaskQuery(taskIds=None, jobName=cls.TEST_JOB, environment=cls.TEST_ENV, instanceIds=instance_ids, owner=Identity(role=cls.TEST_ROLE, user=None))
def test_diff_server_error(self): """Test the diff command if the user passes a config with an error in it.""" mock_options = self.setup_mock_options() (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_failed_status_response( ) self.setup_populate_job_config(mock_scheduler_proxy) with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('twitter.common.app.get_options', return_value=mock_options), patch('subprocess.call', return_value=0), patch('json.loads', return_value=Mock())) as (mock_scheduler_proxy_class, mock_clusters, options, subprocess_patch, json_patch): with temporary_file() as fp: fp.write(self.get_valid_config()) fp.flush() cmd = AuroraCommandLine() result = cmd.execute( ['job', 'diff', 'west/bozo/test/hello', fp.name]) assert result == EXIT_INVALID_PARAMETER # In this error case, we should have called the server getTasksStatus; # but since it fails, we shouldn't call populateJobConfig or subprocess. mock_scheduler_proxy.getTasksStatus.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='bozo'), statuses=ACTIVE_STATES)) assert mock_scheduler_proxy.populateJobConfig.call_count == 0 assert subprocess_patch.call_count == 0
def create_scheduled_task(instance, start_time): task = ScheduledTask(assignedTask=AssignedTask( taskId="task_%s" % instance, slaveId="random_machine_id", slaveHost="junk.nothing", task=TaskConfig(job=JobKey(role="nobody", environment="prod", name='flibber'), owner=Identity(role="nobody"), environment="prod", jobName="flibber", isService=False, numCpus=2, ramMb=2048, diskMb=4096, priority=7, maxTaskFailures=3, production=False, requestedPorts=["http"]), assignedPorts={"http": 1001}, instanceId=instance), status=2, failureCount=instance + 4, taskEvents=create_task_events(start_time), ancestorId="random_task_ancestor%s" % instance) return task
def create_scheduled_tasks(cls): tasks = [] for name in ['foo', 'bar', 'baz']: task = ScheduledTask( failureCount=0, assignedTask=AssignedTask( taskId=1287391823, slaveHost='slavehost', task=TaskConfig( maxTaskFailures=1, executorConfig=ExecutorConfig(data='fake data'), metadata=[], job=JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name), owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=name, numCpus=2, ramMb=2, diskMb=2), instanceId=4237894, assignedPorts={}), status=ScheduleStatus.RUNNING, taskEvents=[ TaskEvent(timestamp=28234726395, status=ScheduleStatus.RUNNING, message="Hi there") ]) tasks.append(task) return tasks
def make_assigned_task(thermos_config, assigned_ports=None): from gen.apache.aurora.api.constants import AURORA_EXECUTOR_NAME from gen.apache.aurora.api.ttypes import ( AssignedTask, ExecutorConfig, Identity, JobKey, TaskConfig ) assigned_ports = assigned_ports or {} executor_config = ExecutorConfig(name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()) task_config = TaskConfig( job=JobKey( role=thermos_config.role().get(), environment="prod", name=thermos_config.name().get()), owner=Identity(role=thermos_config.role().get(), user=thermos_config.role().get()), environment=thermos_config.environment().get(), jobName=thermos_config.name().get(), executorConfig=executor_config) return AssignedTask( instanceId=12345, task=task_config, assignedPorts=assigned_ports, slaveHost='test-host')
def test_simple_config(): job = convert_pystachio_to_thrift(HELLO_WORLD) expected_key = JobKey(role=HELLO_WORLD.role().get(), environment=HELLO_WORLD.environment().get(), name=HELLO_WORLD.name().get()) assert job.instanceCount == 1 tti = job.taskConfig assert job.key == expected_key assert job.owner == Identity(role=HELLO_WORLD.role().get(), user=getpass.getuser()) assert job.cronSchedule is None assert tti.job == expected_key assert tti.jobName == 'hello_world' assert tti.isService is False assert tti.numCpus == 0.1 assert tti.ramMb == 64 assert tti.diskMb == 64 assert tti.requestedPorts == set() assert tti.production is False assert tti.priority == 0 assert tti.maxTaskFailures == 1 assert tti.constraints == set() assert tti.metadata == set() assert tti.environment == HELLO_WORLD.environment().get() assert tti.tier is None
def test_successful_ssh(self): """Test the ssh command.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() sandbox_args = {'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun'} with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('apache.aurora.client.api.command_runner.DistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.call', return_value=0)) as ( mock_scheduler_proxy_class, mock_clusters, mock_runner_args_patch, mock_subprocess): cmd = AuroraCommandLine() cmd.execute(['task', 'ssh', 'west/bozo/test/hello/1', '--command=ls']) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. mock_scheduler_proxy.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='bozo'), instanceIds=set([1]), statuses=set([ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING, ScheduleStatus.DRAINING ]))) mock_subprocess.assert_called_with(['ssh', '-t', 'bozo@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls'])
def _create_query(self, instance_ids): query = TaskQuery() query.owner = Identity(role=self._job_key.role) query.environment = self._job_key.environment query.jobName = self._job_key.name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = instance_ids return query
def get_tasks_status_query(self, instance_ids): query = TaskQuery() query.owner = Identity(role=self._role) query.environment = self._env query.jobName = self._name query.statuses = set([ScheduleStatus.RUNNING]) query.instanceIds = set(instance_ids) return query
def get_expected_task_query(cls, shards=None): instance_ids = frozenset(shards) if shards is not None else None # Helper to create the query that will be a parameter to job kill. return TaskQuery(taskIds=None, jobName=cls.TEST_JOB, environment=cls.TEST_ENV, instanceIds=instance_ids, owner=Identity(role=cls.TEST_ROLE, user=None))
def expect_task_status_call_job_scoped(self): self._scheduler.getTasksWithoutConfigs.assert_called_once_with( TaskQuery( owner=Identity(role=self._role), environment=self._env, jobName=self._name, statuses=LIVE_STATES) )
def create_task(cls): return [ ScheduledTask(assignedTask=AssignedTask( instanceId=0, task=TaskConfig(owner=Identity(role='test_role'), jobName='test_job')), status=ScheduleStatus.RUNNING) ]
def make_job_config(self, task_config, instance_count, cron_schedule=None): return JobConfiguration( key=self._job_key, owner=Identity(role=self._job_key.role), cronSchedule=cron_schedule, taskConfig=deepcopy(task_config), instanceCount=instance_count )
def assert_correct_killtask_calls(cls, api): assert api.killTasks.call_count == 20 # Check the last call's parameters. api.killTasks.assert_called_with( TaskQuery(taskIds=None, jobName='hello', environment='test', instanceIds=frozenset([19]), owner=Identity(role=u'mchucarroll', user=None), statuses=ACTIVE_STATES), 'foo')
def expect_task_status(self, once=False, instances=None): query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.env, jobName=self._job_key.name) if instances is not None: query.instanceIds = frozenset([int(s) for s in instances]) if once: self._scheduler.getTasksWithoutConfigs.assert_called_once_with( query) else: self._scheduler.getTasksWithoutConfigs.assert_called_with(query)
def create_task_config(cls, name): return TaskConfig(maxTaskFailures=1, executorConfig=ExecutorConfig(data='fake data'), metadata=[], job=JobKey(role=cls.TEST_ROLE, environment=cls.TEST_ENV, name=name), owner=Identity(role=cls.TEST_ROLE), environment=cls.TEST_ENV, jobName=name, numCpus=2, ramMb=2, diskMb=2)
def make_task(thermos_config, assigned_ports={}, **kw): role = getpass.getuser() task_id = thermos_config.task().name().get() + '-001' at = AssignedTask(taskId=task_id, task=TaskConfig(executorConfig=ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=thermos_config.json_dumps()), owner=Identity(role=role, user=role)), assignedPorts=assigned_ports, **kw) td = mesos_pb2.TaskInfo() td.task_id.value = task_id td.name = thermos_config.task().name().get() td.data = serialize(at) return td
def task_query(job_key=None, hosts=None, job_keys=None): """Creates TaskQuery optionally scoped by a job(s) or hosts. Arguments: job_key -- AuroraJobKey to scope the query by. hosts -- list of hostnames to scope the query by. job_keys -- list of AuroraJobKeys to scope the query by. """ return TaskQuery(owner=Identity(role=job_key.role) if job_key else None, environment=job_key.env if job_key else None, jobName=job_key.name if job_key else None, slaveHosts=set(hosts) if hosts else None, jobKeys=set(k.to_thrift() for k in job_keys) if job_keys else None, statuses=LIVE_STATES)
def create_task(self, duration, id, host=None, name=None, prod=None): return ScheduledTask( assignedTask=AssignedTask( instanceId=id, slaveHost=host, task=TaskConfig( production=prod if prod is not None else True, jobName=name or self._name, owner=Identity(role=self._role), environment=self._env)), status=ScheduleStatus.RUNNING, taskEvents=[TaskEvent( status=ScheduleStatus.RUNNING, timestamp=(time.time() - duration) * 1000)] )
def test_successful_status_deep(self): """Test the status command more deeply: in a request with a fully specified job, it should end up doing a query using getTasksWithoutConfigs.""" (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.query.return_value = self.create_status_response() with contextlib.nested( patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)): cmd = AuroraCommandLine() cmd.execute(['job', 'status', 'west/bozo/test/hello']) mock_scheduler_proxy.getTasksWithoutConfigs.assert_called_with( TaskQuery(jobName='hello', environment='test', owner=Identity(role='bozo')))
def make_task_configs(self, count=1, prod=True): return [TaskConfig( owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, numCpus=self._num_cpus, ramMb=self._num_ram, diskMb=self._num_disk, priority=0, maxTaskFailures=1, production=prod, taskLinks={'task': 'link'}, contactEmail='*****@*****.**', executorConfig=ExecutorConfig(name='test', data='test data') # Not setting any set()-related properties as that throws off mox verification. )] * count
def generic_test_successful_run(self, cmd_args, instances): """Common structure of all successful run tests. Params: cmd_args: the arguments to pass to the aurora command line to run this test. instances: the list of instances that should be passed to a status query. (The status query is the only visible difference between a sharded run, and an all-instances run in the test.) """ # Calls api.check_status, which calls scheduler_proxy.getJobs (mock_api, mock_scheduler_proxy) = self.create_mock_api() mock_scheduler_proxy.getTasksStatus.return_value = self.create_status_response() sandbox_args = {'slave_root': '/slaveroot', 'slave_run_directory': 'slaverun'} with contextlib.nested( patch('apache.aurora.client.cli.task.print_aurora_log', side_effect=mock_log), patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy), patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS), patch('apache.aurora.client.cli.task.CLUSTERS', new=self.TEST_CLUSTERS), patch('apache.aurora.client.api.command_runner.' 'InstanceDistributedCommandRunner.sandbox_args', return_value=sandbox_args), patch('subprocess.Popen', return_value=self.create_mock_process())) as ( _, mock_scheduler_proxy_class, mock_clusters, mock_clusters_cli, mock_runner_args_patch, mock_subprocess): cmd = AuroraCommandLine() cmd.execute(cmd_args) # The status command sends a getTasksStatus query to the scheduler, # and then prints the result. The use of shards, above, should change # this query - that's the focus of the instances test. mock_scheduler_proxy.getTasksStatus.assert_called_with(TaskQuery(jobName='hello', environment='test', owner=Identity(role='bozo'), statuses=set([ScheduleStatus.RUNNING, ScheduleStatus.KILLING, ScheduleStatus.RESTARTING, ScheduleStatus.PREEMPTING, ScheduleStatus.DRAINING]), instanceIds=instances)) # The mock status call returns 3 three ScheduledTasks, so three commands should have been run assert mock_subprocess.call_count == 3 mock_subprocess.assert_called_with(['ssh', '-n', '-q', 'bozo@slavehost', 'cd /slaveroot/slaves/*/frameworks/*/executors/thermos-1287391823/runs/' 'slaverun/sandbox;ls'], stderr=-2, stdout=-1) # Check that logging worked properly: assert any("Running command" in entry[1] for entry in MOCK_LOG_CONTENTS)
def expect_get_tasks(self, tasks, ignore_ids=None, response_code=None): response_code = ResponseCode.OK if response_code is None else response_code response = Response(responseCode=response_code, messageDEPRECATED='test') scheduled = [] for index, task in enumerate(tasks): if not ignore_ids or index not in ignore_ids: scheduled.append( ScheduledTask(assignedTask=AssignedTask(task=task, instanceId=index))) response.result = Result(scheduleStatusResult=ScheduleStatusResult( tasks=scheduled)) query = TaskQuery(owner=Identity(role=self._job_key.role), environment=self._job_key.environment, jobName=self._job_key.name, statuses=ACTIVE_STATES) self._scheduler.getTasksStatus(query).AndReturn(response)