示例#1
0
def test_actor_holding_serialized_reference(one_worker_100MiB, use_ray_put,
                                            failure):
    @ray.remote
    class GreedyActor(object):
        def __init__(self):
            pass

        def set_ref1(self, ref):
            self.ref1 = ref

        def add_ref2(self, new_ref):
            self.ref2 = new_ref

        def delete_ref1(self):
            self.ref1 = None

        def delete_ref2(self):
            self.ref2 = None

    # Test that the reference held by the actor isn't evicted.
    array_oid = put_object(
        np.zeros(20 * 1024 * 1024, dtype=np.uint8), use_ray_put)
    actor = GreedyActor.remote()
    actor.set_ref1.remote([array_oid])

    # Test that giving the same actor a duplicate reference works.
    ray.get(actor.add_ref2.remote([array_oid]))

    # Remove the local reference.
    array_oid_bytes = array_oid.binary()
    del array_oid

    # Test that the remote references still pin the object.
    _fill_object_store_and_get(array_oid_bytes)

    # Test that removing only the first reference doesn't unpin the object.
    ray.get(actor.delete_ref1.remote())
    _fill_object_store_and_get(array_oid_bytes)

    if failure:
        # Test that the actor exiting stops the reference from being pinned.
        # Kill the actor and wait for the actor to exit.
        kill_actor_and_wait_for_failure(actor)
        with pytest.raises(ray.exceptions.RayActorError):
            ray.get(actor.delete_ref1.remote())
    else:
        # Test that deleting the second reference stops it from being pinned.
        ray.get(actor.delete_ref2.remote())
    _fill_object_store_and_get(array_oid_bytes, succeed=False)
示例#2
0
async def test_asyncio_get(ray_start_regular_shared, event_loop):
    loop = event_loop
    asyncio.set_event_loop(loop)
    loop.set_debug(True)

    # Test Async Plasma
    @ray.remote
    def task():
        return 1

    assert await task.remote().as_future() == 1

    @ray.remote
    def task_throws():
        1 / 0

    with pytest.raises(ray.exceptions.RayTaskError):
        await task_throws.remote().as_future()

    # Test actor calls.
    str_len = 200 * 1024

    @ray.remote
    class Actor:
        def echo(self, i):
            return i

        def big_object(self):
            # 100Kb is the limit for direct call
            return "a" * (str_len)

        def throw_error(self):
            1 / 0

    actor = Actor.remote()

    actor_call_future = actor.echo.remote(2).as_future()
    assert await actor_call_future == 2

    promoted_to_plasma_future = actor.big_object.remote().as_future()
    assert await promoted_to_plasma_future == "a" * str_len

    with pytest.raises(ray.exceptions.RayTaskError):
        await actor.throw_error.remote().as_future()

    kill_actor_and_wait_for_failure(actor)
    with pytest.raises(ray.exceptions.RayActorError):
        await actor.echo.remote(1)
示例#3
0
def test_kill(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def hang(self):
            while True:
                time.sleep(1)

    actor = Actor.remote()
    result = actor.hang.remote()
    ready, _ = ray.wait([result], timeout=0.5)
    assert len(ready) == 0
    kill_actor_and_wait_for_failure(actor)

    with pytest.raises(ray.exceptions.RayActorError):
        ray.get(result)

    with pytest.raises(ValueError):
        ray.kill("not_an_actor_handle")
示例#4
0
def test_capture_child_actors(ray_start_cluster):
    cluster = ray_start_cluster
    total_num_actors = 4
    for _ in range(2):
        cluster.add_node(num_cpus=total_num_actors)
    ray.init(address=cluster.address)

    pg = ray.util.placement_group(
        [{
            "CPU": 2
        }, {
            "CPU": 2
        }], strategy="STRICT_PACK")
    ray.get(pg.ready())

    # If get_current_placement_group is used when the current worker/driver
    # doesn't belong to any of placement group, it should return None.
    assert get_current_placement_group() is None

    # Test actors first.
    @ray.remote(num_cpus=1)
    class NestedActor:
        def ready(self):
            return True

    @ray.remote(num_cpus=1)
    class Actor:
        def __init__(self):
            self.actors = []

        def ready(self):
            return True

        def schedule_nested_actor(self):
            # Make sure we can capture the current placement group.
            assert get_current_placement_group() is not None
            # Actors should be implicitly captured.
            actor = NestedActor.remote()
            ray.get(actor.ready.remote())
            self.actors.append(actor)

        def schedule_nested_actor_outside_pg(self):
            # Don't use placement group.
            actor = NestedActor.options(placement_group=None).remote()
            ray.get(actor.ready.remote())
            self.actors.append(actor)

    a = Actor.options(placement_group=pg).remote()
    ray.get(a.ready.remote())
    # 1 top level actor + 3 children.
    for _ in range(total_num_actors - 1):
        ray.get(a.schedule_nested_actor.remote())
    # Make sure all the actors are scheduled on the same node.
    # (why? The placement group has STRICT_PACK strategy).
    node_id_set = set()
    for actor_info in ray.actors().values():
        node_id = actor_info["Address"]["NodeID"]
        node_id_set.add(node_id)

    # Since all node id should be identical, set should be equal to 1.
    assert len(node_id_set) == 1

    # Kill an actor and wait until it is killed.
    kill_actor_and_wait_for_failure(a)
    with pytest.raises(ray.exceptions.RayActorError):
        ray.get(a.ready.remote())

    # Now create an actor, but do not capture the current tasks
    a = Actor.options(
        placement_group=pg,
        placement_group_capture_child_tasks=False).remote()
    ray.get(a.ready.remote())
    # 1 top level actor + 3 children.
    for _ in range(total_num_actors - 1):
        ray.get(a.schedule_nested_actor.remote())
    # Make sure all the actors are not scheduled on the same node.
    # It is because the child tasks are not scheduled on the same
    # placement group.
    node_id_set = set()
    for actor_info in ray.actors().values():
        node_id = actor_info["Address"]["NodeID"]
        node_id_set.add(node_id)

    assert len(node_id_set) == 2

    # Kill an actor and wait until it is killed.
    kill_actor_and_wait_for_failure(a)
    with pytest.raises(ray.exceptions.RayActorError):
        ray.get(a.ready.remote())

    # Lastly, make sure when None is specified, actors are not scheduled
    # on the same placement group.
    a = Actor.options(placement_group=pg).remote()
    ray.get(a.ready.remote())
    # 1 top level actor + 3 children.
    for _ in range(total_num_actors - 1):
        ray.get(a.schedule_nested_actor_outside_pg.remote())
    # Make sure all the actors are not scheduled on the same node.
    # It is because the child tasks are not scheduled on the same
    # placement group.
    node_id_set = set()
    for actor_info in ray.actors().values():
        node_id = actor_info["Address"]["NodeID"]
        node_id_set.add(node_id)

    assert len(node_id_set) == 2
示例#5
0
def test_detached_placement_group(ray_start_cluster):
    cluster = ray_start_cluster
    for _ in range(2):
        cluster.add_node(num_cpus=3)
    cluster.wait_for_nodes()
    info = ray.init(address=cluster.address)

    # Make sure detached placement group will alive when job dead.
    driver_code = f"""
import ray

ray.init(address="{info["redis_address"]}")

pg = ray.util.placement_group(
        [{{"CPU": 1}} for _ in range(2)],
        strategy="STRICT_SPREAD", lifetime="detached")
ray.get(pg.ready())

@ray.remote(num_cpus=1)
class Actor:
    def ready(self):
        return True

for bundle_index in range(2):
    actor = Actor.options(lifetime="detached", placement_group=pg,
                placement_group_bundle_index=bundle_index).remote()
    ray.get(actor.ready.remote())

ray.shutdown()
    """

    run_string_as_driver(driver_code)

    # Wait until the driver is reported as dead by GCS.
    def is_job_done():
        jobs = ray.jobs()
        for job in jobs:
            if "StopTime" in job:
                return True
        return False

    def assert_alive_num_pg(expected_num_pg):
        alive_num_pg = 0
        for _, placement_group_info in ray.util.placement_group_table().items(
        ):
            if placement_group_info["state"] == "CREATED":
                alive_num_pg += 1
        return alive_num_pg == expected_num_pg

    def assert_alive_num_actor(expected_num_actor):
        alive_num_actor = 0
        for actor_info in ray.actors().values():
            if actor_info["State"] == ray.gcs_utils.ActorTableData.ALIVE:
                alive_num_actor += 1
        return alive_num_actor == expected_num_actor

    wait_for_condition(is_job_done)

    assert assert_alive_num_pg(1)
    assert assert_alive_num_actor(2)

    # Make sure detached placement group will alive when its creator which
    # is detached actor dead.
    # Test actors first.
    @ray.remote(num_cpus=1)
    class NestedActor:
        def ready(self):
            return True

    @ray.remote(num_cpus=1)
    class Actor:
        def __init__(self):
            self.actors = []

        def ready(self):
            return True

        def schedule_nested_actor_with_detached_pg(self):
            # Create placement group which is detached.
            pg = ray.util.placement_group(
                [{
                    "CPU": 1
                } for _ in range(2)],
                strategy="STRICT_SPREAD",
                lifetime="detached",
                name="detached_pg")
            ray.get(pg.ready())
            # Schedule nested actor with the placement group.
            for bundle_index in range(2):
                actor = NestedActor.options(
                    placement_group=pg,
                    placement_group_bundle_index=bundle_index,
                    lifetime="detached").remote()
                ray.get(actor.ready.remote())
                self.actors.append(actor)

    a = Actor.options(lifetime="detached").remote()
    ray.get(a.ready.remote())
    # 1 parent actor and 2 children actor.
    ray.get(a.schedule_nested_actor_with_detached_pg.remote())

    # Kill an actor and wait until it is killed.
    kill_actor_and_wait_for_failure(a)
    with pytest.raises(ray.exceptions.RayActorError):
        ray.get(a.ready.remote())

    # We should have 2 alive pgs and 4 alive actors.
    assert assert_alive_num_pg(2)
    assert assert_alive_num_actor(4)