def test__create_batch_job(): job = Job( job_file="test_job_no_container.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) job.create() job.wait_for_state()
def test_placement_exclusive_job(exclusive_host, peloton_client): excl_constraint = task_pb2.Constraint( type=1, # Label constraint labelConstraint=task_pb2.LabelConstraint( kind=2, # Host condition=2, # Equal requirement=1, label=peloton_pb2.Label( key="peloton/exclusive", value="exclusive-test-label" ), ), ) # Set number of instances to be a few more than what can run on # a single exclusive host job = Job( client=peloton_client, job_file="long_running_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), options=[with_constraint(excl_constraint), with_instance_count(6)], ) job.job_config.defaultConfig.command.value = "sleep 10" job.create() job.wait_for_state() # check that all of them ran on exclusive host task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): assert "exclusive" in task_info.runtime.host
def test__preemption_task_level(respool_a, respool_b): p_job_a = Job( job_file="test_preemptible_job_preemption_override.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=10), ) p_job_a.create() p_job_a.wait_for_state(goal_state="RUNNING") # we should have all 12 tasks in running state def all_running(): return all(t.state == 8 for t in p_job_a.get_tasks().values()) p_job_a.wait_for_condition(all_running) # odd instance ids should be preempted expected_preempted_tasks = set([1, 3, 5, 7, 9, 11]) # even instance ids should be running expected_running_tasks = set([0, 2, 4, 6, 8, 10]) preempted_task_set, running_task_set = set([]), set([]) # 6(6 CPUs worth) tasks from p_job_a should be preempted def task_preempted(): preempted_task_set.clear() running_task_set.clear() preempted_count, running_count = 0, 0 for t in p_job_a.get_tasks().values(): # tasks should be KILLED since killOnPreempt is set to true if t.state == task.KILLED: preempted_count += 1 preempted_task_set.add(t.instance_id) if t.state == task.RUNNING: running_count += 1 running_task_set.add(t.instance_id) return running_count == 6 and preempted_count == 6 p_job_b = Job( job_file="test_preemptible_job.yaml", pool=respool_b, config=IntegrationTestConfig(), ) # starting the second job should change the entitlement calculation and # start preempting tasks from p_job_a p_job_b.create() # 6 tasks(odd instance ids) should be preempted from job1 to make space for job2 p_job_a.wait_for_condition(task_preempted) # check the preempted tasks and check instance ids should be odd. assert preempted_task_set == expected_preempted_tasks assert running_task_set == expected_running_tasks # wait for p_job_b to start running p_job_b.wait_for_state("RUNNING") kill_jobs([p_job_a, p_job_b])
def test__host_limit(peloton_client): job = Job( client=peloton_client, job_file="test_stateless_job_host_limit_1.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) job.create() job.wait_for_state(goal_state="RUNNING") # All running tasks should have different hosts def different_hosts_for_running_tasks(): hosts = set() num_running, num_pending = 0, 0 tasks = job.list_tasks().value for id, t in tasks.items(): if t.runtime.state == task_pb2.TaskState.Value("RUNNING"): num_running = num_running + 1 hosts.add(t.runtime.host) if t.runtime.state == task_pb2.TaskState.Value("PENDING"): num_pending = num_pending + 1 # number of running tasks should be equal to the size of the hosts set # there should be 1 task in PENDING return len(hosts) == num_running and num_pending == 1 job.wait_for_condition(different_hosts_for_running_tasks) job.stop() job.wait_for_state(goal_state="KILLED")
def test_update_job_increase_instances(peloton_client): job = Job( client=peloton_client, job_file="long_running_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) job.create() job.wait_for_state(goal_state="RUNNING") # job has only 1 task to begin with expected_count = 3 def tasks_count(): count = 0 for t in job.get_tasks().values(): if t.state == 8 or t.state == 9: count += 1 print("total instances running/completed: %d" % count) return count == expected_count job.wait_for_condition(tasks_count) # update the job with the new config job.update(new_job_file="long_running_job_update_instances.yaml") # number of tasks should increase to 4 expected_count = 4 job.wait_for_condition(tasks_count) job.wait_for_state(goal_state="RUNNING") kill_jobs([job])
def test__create_job(peloton_client): job = Job( client=peloton_client, config=IntegrationTestConfig(max_retry_attempts=100) ) job.create() job.wait_for_state()
def test__create_a_stateless_job_with_3_tasks_on_3_different_hosts(): label_key = "job.name" label_value = "peloton_stateless_job" job = Job( job_file="test_stateless_job.yaml", config=IntegrationTestConfig( max_retry_attempts=100, pool_file='test_stateless_respool.yaml', ), options=[ with_labels({label_key: label_value}), with_constraint(_label_constraint(label_key, label_value)), with_instance_count(3), ], ) job.create() job.wait_for_state(goal_state="RUNNING") # Determine if tasks run on different hosts hosts = set() for _, task in job.get_tasks().iteritems(): task_info = task.get_info() hosts = hosts.union({task_info.runtime.host}) kill_jobs([job]) # Ensure that the tasks run on 3 different hosts assert len(hosts) == 3
def test__run_failing_job(): job = Job(job_file='test_job_fail.yaml', config=IntegrationTestConfig(max_retry_attempts=100)) job.create() job.wait_for_state(goal_state='FAILED', failed_state='SUCCEEDED') results = job.get_task_runs(0) assert len(results) == 4
def test_job_succeeds_if_controller_task_succeeds(): # only controller task in cjob would succeed. # other tasks would fail, but only controller task should determine # job terminal state cjob = Job(job_file='test_job_succecced_controller_task.yaml') cjob.create() cjob.wait_for_state(goal_state='SUCCEEDED') kill_jobs([cjob])
def test_large_job(): """ Load test against a cluster, not local minicluster friendly """ job = Job(job_file='test_job_no_container.yaml', config=IntegrationTestConfig(max_retry_attempts=1000)) job.job_config.instanceCount = 10000 job.create() job.wait_for_state()
def test__create_job_without_default_config(): job = Job(config=IntegrationTestConfig(max_retry_attempts=100)) default_config = job.job_config.defaultConfig job.job_config.ClearField('defaultConfig') for i in range(0, job.job_config.instanceCount): job.job_config.instanceConfig[i].CopyFrom(default_config) job.create() job.wait_for_state()
def test__run_failing_job(peloton_client): job = Job( client=peloton_client, job_file="test_job_fail.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) job.create() job.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED") results = job.get_task_runs(0) assert len(results) == 4
def test__create_a_batch_job_and_restart_jobmgr_completes_jobs(jobmgr): job = Job(job_file='test_job_no_container.yaml', config=IntegrationTestConfig(max_retry_attempts=100)) job.create() # Restart immediately. That will lave some fraction unallocated and another # fraction initialized. jobmgr.restart() job.wait_for_state() kill_jobs([job])
def test_job_succeeds_if_controller_task_succeeds(peloton_client): # only controller task in cjob would succeed. # other tasks would fail, but only controller task should determine # job terminal state cjob = Job( client=peloton_client, job_file="test_job_succecced_controller_task.yaml", ) cjob.create() cjob.wait_for_state(goal_state="SUCCEEDED") kill_jobs([cjob])
def test_placement_strategy_spread(): job = Job( job_file="test_task.yaml", options=[with_instance_count(3)]) job.job_config.placementStrategy = job_pb2.PLACEMENT_STRATEGY_SPREAD_JOB job.create() job.wait_for_state() # check all of them ran on different hosts hosts = set() task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): assert task_info.runtime.host not in hosts hosts.add(task_info.runtime.host)
def test_controller_task_limit(): # This tests the controller limit of a resource pool. Once it is fully # allocated by a controller task, subsequent tasks can't be admitted. # 1. start controller job1 which uses all the controller limit # 2. start controller job2, make sure it remains pending. # 3. kill job1, make sure job2 starts running. # job1 uses all the controller limit job1 = Job(job_file='test_controller_job.yaml', config=IntegrationTestConfig( pool_file='test_respool_controller_limit.yaml')) job1.create() job1.wait_for_state(goal_state='RUNNING') # job2 should remain pending as job1 used the controller limit job2 = Job(job_file='test_controller_job.yaml', config=IntegrationTestConfig( pool_file='test_respool_controller_limit.yaml')) job2.create() # sleep for 5 seconds to make sure job 2 has enough time time.sleep(5) # make sure job2 can't run job2.wait_for_state(goal_state='PENDING') # stop job1 job1.stop() job1.wait_for_state(goal_state='KILLED') # make sure job2 starts running job2.wait_for_state(goal_state='RUNNING') kill_jobs([job2])
def test__simple_revocable_batch_and_stateless_colocate(): revocable_stateless_job = StatelessJob( job_file="test_stateless_job_revocable_spec.yaml") revocable_stateless_job.create() revocable_stateless_job.wait_for_state(goal_state="RUNNING") revocable_stateless_job.wait_for_all_pods_running() non_revocable_stateless_job = StatelessJob( job_file="test_stateless_job_spec.yaml") non_revocable_stateless_job.create() non_revocable_stateless_job.wait_for_state(goal_state="RUNNING") non_revocable_stateless_job.wait_for_all_pods_running() revocable_batch_job = Job( job_file="test_job_revocable.yaml", config=IntegrationTestConfig(pool_file='test_stateless_respool.yaml'), ) revocable_batch_job.create() revocable_batch_job.wait_for_state(goal_state="RUNNING") non_revocable_batch_job = Job( job_file="test_job.yaml", config=IntegrationTestConfig(pool_file='test_stateless_respool.yaml'), ) non_revocable_batch_job.create() non_revocable_batch_job.wait_for_state(goal_state="RUNNING") revocable_batch_job.wait_for_state() non_revocable_batch_job.wait_for_state()
def test_placement_non_exclusive_job(exclusive_host): # Set number of instances to be a few more than what can run on # 2 (non-exclusive) hosts job = Job(job_file='long_running_job.yaml', config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), options=[with_instance_count(12)]) job.job_config.defaultConfig.command.value = "sleep 10" job.create() job.wait_for_state() # check that none of them ran on exclusive host task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): assert "exclusive" not in task_info.runtime.host
def test_placement_strategy_pack(): job = Job( job_file="test_task.yaml", options=[with_instance_count(5)]) job.job_config.placementStrategy = job_pb2.PLACEMENT_STRATEGY_PACK_HOST job.create() job.wait_for_state() # check all of them ran on same host the_host = "" task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): if the_host: assert task_info.runtime.host == the_host the_host = task_info.runtime.host
def test_placement_strategy_pack(): job = Job(job_file="test_task.yaml", options=[with_instance_count(5)]) """ TODO Uncomment next line after peloton-client changes #job.job_config.placementStrategy = "PLACEMENT_STRATEGY_PACK_HOST" """ job.create() job.wait_for_state() # check all of them ran on same host the_host = "" task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): if the_host: assert task_info.runtime.host == the_host the_host = task_info.runtime.host
def test__preemption_spark_goalstate(respool_a, respool_b): p_job_a = Job( job_file="test_preemptible_job_preemption_policy.yaml", pool=respool_a, config=IntegrationTestConfig( max_retry_attempts=100, sleep_time_sec=10 ), ) p_job_a.create() p_job_a.wait_for_state(goal_state="RUNNING") # we should have all 12 tasks in running state def all_running(): return all(t.state == 8 for t in p_job_a.get_tasks().values()) p_job_a.wait_for_condition(all_running) preempted_task_set = {} # 6(6 CPUs worth) tasks from p_job_a should be preempted def task_preempted(): count = 0 for t in p_job_a.get_tasks().values(): # tasks should be KILLED since killOnPreempt is set to true if t.state == task.KILLED: count += 1 preempted_task_set[t] = True return count == 6 p_job_b = Job( job_file="test_preemptible_job.yaml", pool=respool_b, config=IntegrationTestConfig(), ) # starting the second job should change the entitlement calculation p_job_b.create() # 6 jobs should be preempted from job1 to make space for job2 p_job_a.wait_for_condition(task_preempted) # check the preempted tasks and check the runtime info. for t in preempted_task_set: assert t.state == task.KILLED assert t.goal_state == task.PREEMPTING kill_jobs([p_job_a, p_job_b])
def test__preemption_tasks_reschedules_task(respool_a, respool_b): p_job_a = Job( job_file="test_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=5), ) p_job_a.create() p_job_a.wait_for_state(goal_state="RUNNING") # we should have all 12 tasks in running state def all_running(): return all(t.state == 8 for t in p_job_a.get_tasks().values()) p_job_a.wait_for_condition(all_running) # 6(6 CPUs worth) tasks from p_job_a should be preempted def task_preempted(): count = 0 for t in p_job_a.get_tasks().values(): # tasks should be enqueued back by the jobmanager and once # enqueued they should transition to PENDING state if t.state == task.PENDING: count += 1 return count == 6 p_job_b = Job( job_file="test_preemptible_job.yaml", pool=respool_b, config=IntegrationTestConfig( max_retry_attempts=100, sleep_time_sec=10 ), ) # starting the second job should change the entitlement calculation p_job_b.create() # 6 tasks should be preempted from job1 to make space for job2 p_job_a.wait_for_condition(task_preempted) # p_job_b should succeed p_job_b.wait_for_state(goal_state="SUCCEEDED") kill_jobs([p_job_a, p_job_b])
def test_non_preemptible_job(respool_a): # start non-preemptible job using all of CPU reservation. np_job_a_1 = Job( job_file="test_non_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100), ) np_job_a_1.create() np_job_a_1.wait_for_state(goal_state="RUNNING") # the resource pools CPU allocation should be equal to the reservation. assert np_job_a_1.pool.get_reservation( "cpu") == np_job_a_1.pool.get_allocation("cpu") # start another non-preemptible job which should not be admitted as all # the reservation(CPU) of the resource pool is used up. np_job_a_2 = Job( job_file="test_non_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=5), ) np_job_a_2.create() np_job_a_2.wait_for_state(goal_state="PENDING") # start preemptible job which should start running. p_job_a = Job( job_file="test_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100), ) p_job_a.create() p_job_a.wait_for_state(goal_state="RUNNING") # stop the first non-preemptible job. np_job_a_1.stop() np_job_a_1.wait_for_state(goal_state="KILLED") # make sure the second one completes. np_job_a_2.wait_for_state(goal_state="RUNNING") kill_jobs([np_job_a_2, p_job_a])
def test_task_killed_in_ready_succeeds_when_re_enqueued(placement_engines): # Tests that a if task is deleted which is in READY state in resource # manager and if is re-enqueued succeeds. # stop the placement engines to keep the tasks in READY state placement_engines.stop() # decorate the client to add peloton private API stubs c = with_private_stubs(Client()) # create long running job with 2 instances long_running_job = Job( job_file='long_running_job.yaml', options=[ with_instance_count(2), ], client=c, ) long_running_job.create() long_running_job.wait_for_state(goal_state='PENDING') task = long_running_job.get_task(0) # wait for task to reach READY task.wait_for_pending_state(goal_state='READY') # kill the task task.stop() # re-enqueue the task task.start() # gentlemen, start your (placement) engines placement_engines.start() def wait_for_instance_to_run(): return long_running_job.get_task(0).state_str == 'RUNNING' long_running_job.wait_for_condition(wait_for_instance_to_run)
def test_controller_task_limit_executor_can_run(peloton_client): # This tests the controller limit isn't applied to non-controller jobs. # 1. start controller cjob1 which uses all the controller limit # 2. start controller cjob2, make sure it remains pending. # 3. start non-controller job, make sure it succeeds. # job1 uses all the controller limit cjob1 = Job( client=peloton_client, job_file="test_controller_job.yaml", config=IntegrationTestConfig( pool_file="test_respool_controller_limit.yaml"), ) cjob1.create() cjob1.wait_for_state(goal_state="RUNNING") # job2 should remain pending as job1 used the controller limit cjob2 = Job( client=peloton_client, job_file="test_controller_job.yaml", config=IntegrationTestConfig( pool_file="test_respool_controller_limit.yaml"), ) cjob2.create() # sleep for 5 seconds to make sure job 2 has enough time time.sleep(5) # make sure job2 can't run cjob2.wait_for_state(goal_state="PENDING") # start a normal executor job job = Job( client=peloton_client, job_file="test_job.yaml", config=IntegrationTestConfig( pool_file="test_respool_controller_limit.yaml"), ) job.create() # make sure job can run and finish job.wait_for_state(goal_state="SUCCEEDED") kill_jobs([cjob1, cjob2])
def test__create_2_stateless_jobs_with_task_to_task_anti_affinity_between_jobs( ): # noqa label_key = "job.name" jobs = [] for i in range(2): job = Job( job_file="test_stateless_job.yaml", config=IntegrationTestConfig( max_retry_attempts=100, pool_file='test_stateless_respool.yaml', ), options=[ with_labels({label_key: "peloton_stateless_job%s" % i}), with_job_name("TestPelotonDockerJob_Stateless" + repr(i)), with_instance_count(1), ], ) job.job_config.defaultConfig.constraint.CopyFrom( task_pb2.Constraint( type=2, andConstraint=task_pb2.AndConstraint(constraints=[ task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Tasks of my own job key="job.name", value="peloton_stateless_job%s" % i, ), ), ), task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Avoid tasks of the other job key="job.name", value="peloton_stateless_job%s" % ((i + 1) % 2), ), ), ), ]), )) jobs.append(job) for job in jobs: job.create() time.sleep(1) # Determine if tasks run on different hosts hosts = set() for job in jobs: job.wait_for_state(goal_state="RUNNING") for _, task in job.get_tasks().iteritems(): task_info = task.get_info() hosts = hosts.union(set({task_info.runtime.host})) kill_jobs(jobs) # Ensure that the tasks run on 2 different hosts assert len(hosts) == 2
def test__create_job(): job = Job(config=IntegrationTestConfig(max_retry_attempts=100)) job.create() job.wait_for_state()
def test__dynamic_partition_pool_restrictions(peloton_client): # we start with shared=1, batch_reserved=2 # delete batch_reserved so that its hosts go to "default" delete_host_pool(util.HOSTPOOL_BATCH_RESERVED) # setup 3 host-pools with 1 host each ensure_host_pool(util.HOSTPOOL_BATCH_RESERVED, 1) ensure_host_pool(util.HOSTPOOL_SHARED, 1) ensure_host_pool(util.HOSTPOOL_STATELESS, 1) hostToPool = dict() resp = list_host_pools() for pool in resp.pools: for h in pool.hosts: hostToPool[h] = pool.name # Job has two instances with 3 cpus each. # Only one instance will run. npjob = Job( client=peloton_client, job_file="test_non_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) npjob.create() npjob.wait_for_state(goal_state='RUNNING') count = 0 for t in npjob.get_tasks(): if npjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = npjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_BATCH_RESERVED assert count == 1 # Stateless job has 4 instances with host limit 1 # so only one instance will run sjob = Job( client=peloton_client, job_file="test_stateless_job_host_limit_1.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) sjob.create() sjob.wait_for_state(goal_state="RUNNING") count = 0 for t in sjob.get_tasks(): if sjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = sjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_STATELESS assert count == 3 # Preemptible batch job has 12 instances with 1 CPU each, # so 4 instances will run. pjob = Job( client=peloton_client, job_file="test_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) pjob.create() pjob.wait_for_state(goal_state="RUNNING") count = 0 for t in pjob.get_tasks(): if pjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = pjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_SHARED assert count == 8 # Stop all jobs npjob.stop() sjob.stop() pjob.stop()
def test__preemption_non_preemptible_task(respool_a, respool_b): # Create 2 Jobs : 1 preemptible and 1 non-preemptible in respool A p_job_a = Job( job_file="test_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=10), ) p_job_a.update_instance_count(6) np_job_a = Job( job_file="test_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(), ) np_job_a.job_config.sla.preemptible = False np_job_a.update_instance_count(6) # preemptible job takes 6 CPUs p_job_a.create() # non preemptible job takes 6 reserved CPUs np_job_a.create() p_job_a.wait_for_state("RUNNING") np_job_a.wait_for_state("RUNNING") # pool allocation is more than reservation assert np_job_a.pool.get_reservation("cpu") < np_job_a.pool.get_allocation( "cpu") # Create another job in respool B p_job_b = Job( job_file="test_preemptible_job.yaml", pool=respool_b, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=10), ) p_job_b.update_instance_count(6) p_job_b.create() # p_job_b should remain PENDING since all resources are used by # p_job_a p_job_b.wait_for_state("PENDING") # p_job_a should be preempted and go back to PENDING p_job_a.wait_for_state(goal_state="PENDING") # np_job_a should keep RUNNING np_job_a.wait_for_state("RUNNING") def all_tasks_running(): count = 0 for t in np_job_a.get_tasks().values(): if t.state == task.RUNNING: count += 1 return count == 6 # p_job_b should start running p_job_b.wait_for_condition(all_tasks_running) # pool A allocation is equal to reservation assert np_job_a.pool.get_reservation( "cpu") == np_job_a.pool.get_allocation("cpu") # pool B allocation is equal to reservation assert p_job_b.pool.get_reservation("cpu") == p_job_b.pool.get_allocation( "cpu") # wait for p_job_b to finish p_job_b.wait_for_state("SUCCEEDED") # make sure p_job_a starts running p_job_a.wait_for_state("RUNNING") kill_jobs([p_job_a, np_job_a, p_job_b])
def test__tasks_reserve_execution(hostreservepool, peloton_client): p_job_median = Job( client=peloton_client, job_file='test_hostreservation_job_median.yaml', pool=hostreservepool, config=IntegrationTestConfig( max_retry_attempts=100, sleep_time_sec=1), ) p_job_median.create() p_job_median.wait_for_state(goal_state='RUNNING') # we should have all 3 tasks in running state def all_running(): return all(t.state == task.RUNNING for t in p_job_median.get_tasks().values()) p_job_median.wait_for_condition(all_running) # decorate the client to add peloton private API stubs client = with_private_stubs(peloton_client) p_job_large = Job( job_file='test_hostreservation_job_large.yaml', pool=hostreservepool, config=IntegrationTestConfig( sleep_time_sec=1, max_retry_attempts=300), options=[with_instance_count(1)], client=client, ) p_job_large.create() p_job_large.wait_for_state(goal_state='PENDING') request = hostmgr.GetHostsByQueryRequest() # task should get into reserved state and RUNNING state t1 = p_job_large.get_task(0) t1.wait_for_pending_state(goal_state="RESERVED") # the task is running on reserved host def get_reserved_host(): resp = client.hostmgr_svc.GetHostsByQuery( request, metadata=p_job_large.client.hostmgr_metadata, timeout=p_job_large.config.rpc_timeout_sec,) for h in resp.hosts: if h.status == 'reserved': return h.hostname return '' def is_reserved(): return get_reserved_host() != '' p_job_large.wait_for_condition(is_reserved) reserved_host = get_reserved_host() t1.wait_for_pending_state(goal_state="RUNNING") assert reserved_host == t1.get_info().runtime.host # p_job_large should succeed p_job_large.wait_for_state() # no host is in reserved state response = client.hostmgr_svc.GetHostsByQuery( request, metadata=p_job_large.client.hostmgr_metadata, timeout=p_job_large.config.rpc_timeout_sec,) for host in response.hosts: assert host.status != 'reserved' kill_jobs([p_job_median, p_job_large])