def test_controller_task_limit(): # This tests the controller limit of a resource pool. Once it is fully # allocated by a controller task, subsequent tasks can't be admitted. # 1. start controller job1 which uses all the controller limit # 2. start controller job2, make sure it remains pending. # 3. kill job1, make sure job2 starts running. # job1 uses all the controller limit job1 = Job(job_file='test_controller_job.yaml', config=IntegrationTestConfig( pool_file='test_respool_controller_limit.yaml')) job1.create() job1.wait_for_state(goal_state='RUNNING') # job2 should remain pending as job1 used the controller limit job2 = Job(job_file='test_controller_job.yaml', config=IntegrationTestConfig( pool_file='test_respool_controller_limit.yaml')) job2.create() # sleep for 5 seconds to make sure job 2 has enough time time.sleep(5) # make sure job2 can't run job2.wait_for_state(goal_state='PENDING') # stop job1 job1.stop() job1.wait_for_state(goal_state='KILLED') # make sure job2 starts running job2.wait_for_state(goal_state='RUNNING') kill_jobs([job2])
def test__host_limit(peloton_client): job = Job( client=peloton_client, job_file="test_stateless_job_host_limit_1.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) job.create() job.wait_for_state(goal_state="RUNNING") # All running tasks should have different hosts def different_hosts_for_running_tasks(): hosts = set() num_running, num_pending = 0, 0 tasks = job.list_tasks().value for id, t in tasks.items(): if t.runtime.state == task_pb2.TaskState.Value("RUNNING"): num_running = num_running + 1 hosts.add(t.runtime.host) if t.runtime.state == task_pb2.TaskState.Value("PENDING"): num_pending = num_pending + 1 # number of running tasks should be equal to the size of the hosts set # there should be 1 task in PENDING return len(hosts) == num_running and num_pending == 1 job.wait_for_condition(different_hosts_for_running_tasks) job.stop() job.wait_for_state(goal_state="KILLED")
def test__start_stop_task_without_job_id(): job_without_id = Job() resp = job_without_id.start() assert resp.HasField('error') assert resp.error.HasField('notFound') resp = job_without_id.stop() assert resp.HasField('error') assert resp.error.HasField('notFound')
def test__start_stop_task_without_job_id(peloton_client): job_without_id = Job(client=peloton_client, ) resp = job_without_id.start() assert resp.HasField("error") assert resp.error.HasField("notFound") resp = job_without_id.stop() assert resp.HasField("error") assert resp.error.HasField("notFound")
def test__start_stop_task_with_nonexistent_job_id(): job_with_nonexistent_id = Job() job_with_nonexistent_id.job_id = "nonexistent-job-id" resp = job_with_nonexistent_id.start() assert resp.HasField('error') assert resp.error.HasField('notFound') resp = job_with_nonexistent_id.stop() assert resp.HasField('error') assert resp.error.HasField('notFound')
def test__start_stop_task_with_nonexistent_job_id(peloton_client): job_with_nonexistent_id = Job(client=peloton_client) job_with_nonexistent_id.job_id = "nonexistent-job-id" resp = job_with_nonexistent_id.start() assert resp.HasField("error") assert resp.error.HasField("notFound") resp = job_with_nonexistent_id.stop() assert resp.HasField("error") assert resp.error.HasField("notFound")
def test_non_preemptible_job(respool_a): # start non-preemptible job using all of CPU reservation. np_job_a_1 = Job( job_file="test_non_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100), ) np_job_a_1.create() np_job_a_1.wait_for_state(goal_state="RUNNING") # the resource pools CPU allocation should be equal to the reservation. assert np_job_a_1.pool.get_reservation( "cpu") == np_job_a_1.pool.get_allocation("cpu") # start another non-preemptible job which should not be admitted as all # the reservation(CPU) of the resource pool is used up. np_job_a_2 = Job( job_file="test_non_preemptible_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=5), ) np_job_a_2.create() np_job_a_2.wait_for_state(goal_state="PENDING") # start preemptible job which should start running. p_job_a = Job( job_file="test_job.yaml", pool=respool_a, config=IntegrationTestConfig(max_retry_attempts=100), ) p_job_a.create() p_job_a.wait_for_state(goal_state="RUNNING") # stop the first non-preemptible job. np_job_a_1.stop() np_job_a_1.wait_for_state(goal_state="KILLED") # make sure the second one completes. np_job_a_2.wait_for_state(goal_state="RUNNING") kill_jobs([np_job_a_2, p_job_a])
def test__dynamic_partition_pool_restrictions(peloton_client): # we start with shared=1, batch_reserved=2 # delete batch_reserved so that its hosts go to "default" delete_host_pool(util.HOSTPOOL_BATCH_RESERVED) # setup 3 host-pools with 1 host each ensure_host_pool(util.HOSTPOOL_BATCH_RESERVED, 1) ensure_host_pool(util.HOSTPOOL_SHARED, 1) ensure_host_pool(util.HOSTPOOL_STATELESS, 1) hostToPool = dict() resp = list_host_pools() for pool in resp.pools: for h in pool.hosts: hostToPool[h] = pool.name # Job has two instances with 3 cpus each. # Only one instance will run. npjob = Job( client=peloton_client, job_file="test_non_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) npjob.create() npjob.wait_for_state(goal_state='RUNNING') count = 0 for t in npjob.get_tasks(): if npjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = npjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_BATCH_RESERVED assert count == 1 # Stateless job has 4 instances with host limit 1 # so only one instance will run sjob = Job( client=peloton_client, job_file="test_stateless_job_host_limit_1.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) sjob.create() sjob.wait_for_state(goal_state="RUNNING") count = 0 for t in sjob.get_tasks(): if sjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = sjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_STATELESS assert count == 3 # Preemptible batch job has 12 instances with 1 CPU each, # so 4 instances will run. pjob = Job( client=peloton_client, job_file="test_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) pjob.create() pjob.wait_for_state(goal_state="RUNNING") count = 0 for t in pjob.get_tasks(): if pjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = pjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_SHARED assert count == 8 # Stop all jobs npjob.stop() sjob.stop() pjob.stop()