Пример #1
0
def test_kill_cancel_metadata(ray_start_regular):
    """
    Verifies that client worker's terminate_actor and terminate_task methods
    pass worker's metadata attribute server to the grpc stub's Terminate
    method.

    This is done by mocking the grpc stub's Terminate method to raise an
    exception with argument equal to the key of the metadata. We then verify
    that the exception is raised when calling ray.kill and ray.cancel.
    """
    with ray_start_client_server(metadata=[("key", "value")]) as ray:

        @ray.remote
        class A:
            pass

        @ray.remote
        def f():
            time.sleep(1000)

        class MetadataIsCorrectlyPassedException(Exception):
            pass

        def mock_terminate(self, term):
            raise MetadataIsCorrectlyPassedException(self._metadata[1][0])

        # Mock stub's Terminate method to raise an exception.
        stub = ray.get_context().api.worker.data_client
        stub.Terminate = mock_terminate.__get__(stub)

        # Verify the expected exception is raised with ray.kill.
        # Check that argument of the exception matches "key" from the
        # metadata above.
        actor = A.remote()
        with pytest.raises(MetadataIsCorrectlyPassedException, match="key"):
            ray.kill(actor)

        # Verify the expected exception is raised with ray.cancel.
        task_ref = f.remote()
        with pytest.raises(MetadataIsCorrectlyPassedException, match="key"):
            ray.cancel(task_ref)
Пример #2
0
def test_get_ray_metadata(ray_start_regular_shared):
    """Test the ClusterInfo client data pathway and API surface
    """
    with ray_start_client_server() as ray:
        ip_address = ray_start_regular_shared["node_ip_address"]

        initialized = ray.is_initialized()
        assert initialized

        nodes = ray.nodes()
        assert len(nodes) == 1, nodes
        assert nodes[0]["NodeManagerAddress"] == ip_address

        current_node_id = "node:" + ip_address

        cluster_resources = ray.cluster_resources()
        available_resources = ray.available_resources()

        assert cluster_resources["CPU"] == 1.0
        assert current_node_id in cluster_resources
        assert current_node_id in available_resources
Пример #3
0
def main(results=None):
    results = results or []

    ray_config = {"logging_level": logging.WARNING}

    def ray_connect_handler(job_config=None, **ray_init_kwargs):
        from ray._private.client_mode_hook import disable_client_hook

        with disable_client_hook():
            import ray as real_ray

            if not real_ray.is_initialized():
                real_ray.init(**ray_config)

    for name, obj in inspect.getmembers(sys.modules[__name__]):
        if not name.startswith("benchmark_"):
            continue
        with ray_start_client_server(ray_connect_handler=ray_connect_handler) as ray:
            obj(ray, results)

    return results
Пример #4
0
def test_client_mode_hook_thread_safe(ray_start_regular_shared):
    with ray_start_client_server():
        with enable_client_mode():
            assert client_mode_should_convert(auto_init=True)
            lock = threading.Lock()
            lock.acquire()
            q = queue.Queue()

            def disable():
                with disable_client_hook():
                    q.put(client_mode_should_convert(auto_init=True))
                    lock.acquire()
                q.put(client_mode_should_convert(auto_init=True))

            t = threading.Thread(target=disable)
            t.start()
            assert client_mode_should_convert(auto_init=True)
            lock.release()
            t.join()
            assert q.get() is False, "Threaded disable_client_hook failed  to disable"
            assert q.get() is True, "Threaded disable_client_hook failed to re-enable"
Пример #5
0
def test_delete_actor(ray_start_regular):
    with ray_start_client_server() as ray:

        @ray.remote
        class Accumulator:
            def __init__(self):
                self.acc = 0

            def inc(self):
                self.acc += 1

        actor = Accumulator.remote()
        actor.inc.remote()
        actor2 = Accumulator.remote()
        actor2.inc.remote()

        assert server_actor_ref_count(2)()

        del actor

        wait_for_condition(server_actor_ref_count(1), timeout=5)
Пример #6
0
def main():
    ray_config = {
        "_system_config": {
            "put_small_object_in_memory_store": True
        },
        "logging_level": logging.WARNING
    }

    def ray_connect_handler(job_config=None):
        from ray._private.client_mode_hook import disable_client_hook
        with disable_client_hook():
            import ray as real_ray
            if not real_ray.is_initialized():
                real_ray.init(**ray_config)

    for name, obj in inspect.getmembers(sys.modules[__name__]):
        if not name.startswith("benchmark_"):
            continue
        with ray_start_client_server(
                ray_connect_handler=ray_connect_handler) as ray:
            obj(ray)
Пример #7
0
def test_large_remote_call(ray_start_regular_shared):
    """
    Test remote calls with large (multiple chunk) arguments
    """
    with ray_start_client_server() as ray:

        @ray.remote
        def f(large_obj):
            return large_obj.shape

        @ray.remote
        def f2(*args):
            assert args[0] == 123
            return args[1].shape

        @ray.remote
        def f3(*args, **kwargs):
            assert args[0] == "a"
            assert args[1] == "b"
            return kwargs["large_obj"].shape

        # 1024x1024x16 f64's =~ 128 MiB. Chunking size is 64 MiB, so guarantees
        # that transferring argument requires multiple chunks.
        assert OBJECT_TRANSFER_CHUNK_SIZE < 2**20 * 128
        large_obj = np.random.random((1024, 1024, 16))
        assert ray.get(f.remote(large_obj)) == (1024, 1024, 16)
        assert ray.get(f2.remote(123, large_obj)) == (1024, 1024, 16)
        assert ray.get(f3.remote("a", "b",
                                 large_obj=large_obj)) == (1024, 1024, 16)

        @ray.remote
        class SomeActor:
            def __init__(self, large_obj):
                self.inner = large_obj

            def some_method(self, large_obj):
                return large_obj.shape == self.inner.shape

        a = SomeActor.remote(large_obj)
        assert ray.get(a.some_method.remote(large_obj))
Пример #8
0
def test_cancel_chain(ray_start_regular, use_force):
    with ray_start_client_server() as ray:
        SignalActor = create_remote_signal_actor(ray)
        signaler = SignalActor.remote()

        @ray.remote
        def wait_for(t):
            return ray.get(t[0])

        obj1 = wait_for.remote([signaler.wait.remote()])
        obj2 = wait_for.remote([obj1])
        obj3 = wait_for.remote([obj2])
        obj4 = wait_for.remote([obj3])

        assert len(ray.wait([obj1], timeout=.1)[0]) == 0
        ray.cancel(obj1, force=use_force)
        for ob in [obj1, obj2, obj3, obj4]:
            with pytest.raises(valid_exceptions(use_force)):
                ray.get(ob)

        signaler2 = SignalActor.remote()
        obj1 = wait_for.remote([signaler2.wait.remote()])
        obj2 = wait_for.remote([obj1])
        obj3 = wait_for.remote([obj2])
        obj4 = wait_for.remote([obj3])

        assert len(ray.wait([obj3], timeout=.1)[0]) == 0
        ray.cancel(obj3, force=use_force)
        for ob in [obj3, obj4]:
            with pytest.raises(valid_exceptions(use_force)):
                ray.get(ob)

        with pytest.raises(GetTimeoutError):
            ray.get(obj1, timeout=.1)

        with pytest.raises(GetTimeoutError):
            ray.get(obj2, timeout=.1)

        signaler2.send.remote()
        ray.get(obj1)
Пример #9
0
def test_basic_named_actor(ray_start_regular_shared):
    """Test that ray.get_actor() can create and return a detached actor.
    """
    with ray_start_client_server() as ray:

        @ray.remote
        class Accumulator:
            def __init__(self):
                self.x = 0

            def inc(self):
                self.x += 1

            def get(self):
                return self.x

        # Create the actor
        actor = Accumulator.options(name="test_acc").remote()

        actor.inc.remote()
        actor.inc.remote()

        # Make sure the get_actor call works
        new_actor = ray.get_actor("test_acc")
        new_actor.inc.remote()
        assert ray.get(new_actor.get.remote()) == 3

        del actor

        actor = Accumulator.options(name="test_acc2",
                                    lifetime="detached").remote()
        actor.inc.remote()
        del actor

        detatched_actor = ray.get_actor("test_acc2")
        for i in range(5):
            detatched_actor.inc.remote()

        assert ray.get(detatched_actor.get.remote()) == 6
Пример #10
0
def test_stdout_log_stream(ray_start_regular_shared):
    with ray_start_client_server() as ray:
        log_msgs = []

        def test_log(level, msg):
            log_msgs.append(msg)

        ray.worker.log_client.stdstream = test_log

        @ray.remote
        def print_on_stderr_and_stdout(s):
            print(s)
            print(s, file=sys.stderr)

        time.sleep(1)
        print_on_stderr_and_stdout.remote("Hello world")
        time.sleep(1)
        num_hello = 0
        for msg in log_msgs:
            if "Hello world" in msg:
                num_hello += 1
        assert num_hello == 2, f"Invalid logs: {log_msgs}"
Пример #11
0
def test_create_remote_before_start(ray_start_regular_shared):
    """Creates remote objects (as though in a library) before
    starting the client.
    """
    from ray.util.client import ray

    @ray.remote
    class Returner:
        def doit(self):
            return "foo"

    @ray.remote
    def f(x):
        return x + 20

    # Prints in verbose tests
    print("Created remote functions")

    with ray_start_client_server() as ray:
        assert ray.get(f.remote(3)) == 23
        a = Returner.remote()
        assert ray.get(a.doit.remote()) == "foo"
Пример #12
0
def test_rllib_integration(ray_start_regular_shared):
    with ray_start_client_server():
        import ray.rllib.agents.dqn as dqn
        # Confirming the behavior of this context manager.
        # (Client mode hook not yet enabled.)
        assert not client_mode_should_convert()
        # Need to enable this for client APIs to be used.
        with enable_client_mode():
            # Confirming mode hook is enabled.
            assert client_mode_should_convert()

            config = dqn.SIMPLE_Q_DEFAULT_CONFIG.copy()
            # Run locally.
            config["num_workers"] = 0
            # Test with compression.
            config["compress_observations"] = True
            num_iterations = 2
            trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v1")
            rw = trainer.workers.local_worker()
            for i in range(num_iterations):
                sb = rw.sample()
                assert sb.count == config["rollout_fragment_length"]
                trainer.train()
Пример #13
0
def test_serializing_exceptions(ray_start_regular_shared):
    with ray_start_client_server() as ray:
        with pytest.raises(
                ValueError, match="Failed to look up actor with name 'abc'"):
            ray.get_actor("abc")
Пример #14
0
def test_pass_handles(ray_start_regular_shared):
    """Test that passing client handles to actors and functions to remote actors
    in functions (on the server or raylet side) works transparently to the
    caller.
    """
    with ray_start_client_server() as ray:

        @ray.remote
        class ExecActor:
            def exec(self, f, x):
                return ray.get(f.remote(x))

            def exec_exec(self, actor, f, x):
                return ray.get(actor.exec.remote(f, x))

        @ray.remote
        def fact(x):
            out = 1
            while x > 0:
                out = out * x
                x -= 1
            return out

        @ray.remote
        def func_exec(f, x):
            return ray.get(f.remote(x))

        @ray.remote
        def func_actor_exec(actor, f, x):
            return ray.get(actor.exec.remote(f, x))

        @ray.remote
        def sneaky_func_exec(obj, x):
            return ray.get(obj["f"].remote(x))

        @ray.remote
        def sneaky_actor_exec(obj, x):
            return ray.get(obj["actor"].exec.remote(obj["f"], x))

        def local_fact(x):
            if x <= 0:
                return 1
            return x * local_fact(x - 1)

        assert ray.get(fact.remote(7)) == local_fact(7)
        assert ray.get(func_exec.remote(fact, 8)) == local_fact(8)
        test_obj = {}
        test_obj["f"] = fact
        assert ray.get(sneaky_func_exec.remote(test_obj, 5)) == local_fact(5)
        actor_handle = ExecActor.remote()
        assert ray.get(actor_handle.exec.remote(fact, 7)) == local_fact(7)
        assert ray.get(func_actor_exec.remote(actor_handle, fact,
                                              10)) == local_fact(10)
        second_actor = ExecActor.remote()
        assert ray.get(actor_handle.exec_exec.remote(second_actor, fact,
                                                     9)) == local_fact(9)
        test_actor_obj = {}
        test_actor_obj["actor"] = second_actor
        test_actor_obj["f"] = fact
        assert ray.get(sneaky_actor_exec.remote(test_actor_obj,
                                                4)) == local_fact(4)
Пример #15
0
def start_client_server_4_cpus():
    ray.init(num_cpus=4)
    with ray_start_client_server() as client:
        yield client
Пример #16
0
def start_client_server():
    with ray_start_client_server() as client:
        yield client
Пример #17
0
 def test_connection(self):
     with ray_start_client_server():
         assert ray.util.client.ray.is_connected()
     assert ray.util.client.ray.is_connected() is False
Пример #18
0
def test_ray_init_valid_keyword_with_client(shutdown_only):
    with ray_start_client_server() as given_connection:
        given_connection.disconnect()
        # logging_level should be passed to the server
        with ray.init("ray://localhost:50051", logging_level=logging.INFO):
            pass
Пример #19
0
def test_serializing_exceptions(ray_start_regular_shared):
    with ray_start_client_server() as ray:
        with pytest.raises(ValueError):
            ray.get_actor("abc")
Пример #20
0
def test_client_gpu_ids(call_ray_stop_only):
    import ray
    ray.init(num_cpus=2)

    with ray_start_client_server() as ray:
        assert ray.get_gpu_ids() == []