def test_placement_group_wait(ray_start_cluster): cluster = ray_start_cluster [cluster.add_node(num_cpus=2) for _ in range(2)] ray.init(address=cluster.address) cluster.wait_for_nodes() # Wait on placement group that cannot be created. placement_group = ray.util.placement_group( name="name", strategy="SPREAD", bundles=[ { "CPU": 2 }, { "CPU": 2 }, ]) ready, unready = ray.wait([placement_group.ready()]) assert len(unready) == 0 assert len(ready) == 1 table = ray.util.placement_group_table(placement_group) assert table["state"] == "CREATED" pg = ray.get(placement_group.ready()) assert pg.bundle_specs == placement_group.bundle_specs assert pg.id.binary() == placement_group.id.binary()
def test_pending_placement_group_wait(ray_start_cluster): cluster = ray_start_cluster [cluster.add_node(num_cpus=2) for _ in range(1)] ray.init(address=cluster.address) cluster.wait_for_nodes() # Wait on placement group that cannot be created. placement_group = ray.util.placement_group( name="name", strategy="SPREAD", bundles=[ { "CPU": 2 }, { "CPU": 2 }, { "GPU": 2 }, ]) ready, unready = ray.wait([placement_group.ready()], timeout=0.1) assert len(unready) == 1 assert len(ready) == 0 table = ray.util.placement_group_table(placement_group) assert table["state"] == "PENDING" with pytest.raises(ray.exceptions.GetTimeoutError): ray.get(placement_group.ready(), timeout=0.1)
def test_placement_group_strict_spread(ray_start_cluster): @ray.remote(num_cpus=2) class Actor(object): def __init__(self): self.n = 0 def value(self): return self.n cluster = ray_start_cluster num_nodes = 3 for _ in range(num_nodes): cluster.add_node(num_cpus=4) ray.init(address=cluster.address) placement_group = ray.util.placement_group( name="name", strategy="STRICT_SPREAD", bundles=[{ "CPU": 2 }, { "CPU": 2 }, { "CPU": 2 }]) ray.get(placement_group.ready()) actor_1 = Actor.options( placement_group=placement_group, placement_group_bundle_index=0).remote() actor_2 = Actor.options( placement_group=placement_group, placement_group_bundle_index=1).remote() actor_3 = Actor.options( placement_group=placement_group, placement_group_bundle_index=2).remote() ray.get(actor_1.value.remote()) ray.get(actor_2.value.remote()) ray.get(actor_3.value.remote()) # Get all actors. actor_infos = ray.actors() # Make sure all actors in counter_list are located in separate nodes. actor_info_1 = actor_infos.get(actor_1._actor_id.hex()) actor_info_2 = actor_infos.get(actor_2._actor_id.hex()) actor_info_3 = actor_infos.get(actor_3._actor_id.hex()) assert actor_info_1 and actor_info_2 and actor_info_3 node_of_actor_1 = actor_info_1["Address"]["NodeID"] node_of_actor_2 = actor_info_2["Address"]["NodeID"] node_of_actor_3 = actor_info_3["Address"]["NodeID"] assert node_of_actor_1 != node_of_actor_2 assert node_of_actor_1 != node_of_actor_3 assert node_of_actor_2 != node_of_actor_3
def test_placement_group_pack(ray_start_cluster): @ray.remote(num_cpus=2) class Actor(object): def __init__(self): self.n = 0 def value(self): return self.n cluster = ray_start_cluster num_nodes = 2 for _ in range(num_nodes): cluster.add_node(num_cpus=4) ray.init(address=cluster.address) placement_group = ray.util.placement_group( name="name", strategy="PACK", bundles=[ { "CPU": 2, "GPU": 0 # Test 0 resource spec doesn't break tests. }, { "CPU": 2 } ]) ray.get(placement_group.ready()) actor_1 = Actor.options( placement_group=placement_group, placement_group_bundle_index=0).remote() actor_2 = Actor.options( placement_group=placement_group, placement_group_bundle_index=1).remote() ray.get(actor_1.value.remote()) ray.get(actor_2.value.remote()) # Get all actors. actor_infos = ray.actors() # Make sure all actors in counter_list are collocated in one node. actor_info_1 = actor_infos.get(actor_1._actor_id.hex()) actor_info_2 = actor_infos.get(actor_2._actor_id.hex()) assert actor_info_1 and actor_info_2 node_of_actor_1 = actor_info_1["Address"]["NodeID"] node_of_actor_2 = actor_info_2["Address"]["NodeID"] assert node_of_actor_1 == node_of_actor_2