示例#1
0
def test_multiple_waits_and_gets(shutdown_only):
    # It is important to use three workers here, so that the three tasks
    # launched in this experiment can run at the same time.
    ray.init(num_cpus=3)

    @ray.remote
    def f(delay):
        time.sleep(delay)
        return 1

    @ray.remote
    def g(input_list):
        # The argument input_list should be a list containing one object ref.
        ray.wait([input_list[0]])

    @ray.remote
    def h(input_list):
        # The argument input_list should be a list containing one object ref.
        ray.get(input_list[0])

    # Make sure that multiple wait requests involving the same object ref
    # all return.
    x = f.remote(1)
    ray.get([g.remote([x]), g.remote([x])])

    # Make sure that multiple get requests involving the same object ref all
    # return.
    x = f.remote(1)
    ray.get([h.remote([x]), h.remote([x])])
示例#2
0
def test_wait_makes_object_local(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=0)
    cluster.add_node(num_cpus=2)
    ray.init(address=cluster.address)

    @ray.remote
    class Foo:
        def method(self):
            return np.zeros(1024 * 1024)

    a = Foo.remote()

    # Test get makes the object local.
    x_id = a.method.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(x_id)
    ray.get(x_id)
    assert ray.worker.global_worker.core_worker.object_exists(x_id)

    # Test wait makes the object local.
    x_id = a.method.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(x_id)
    ok, _ = ray.wait([x_id])
    assert len(ok) == 1
    assert ray.worker.global_worker.core_worker.object_exists(x_id)
示例#3
0
def test_actor_pass_by_ref_order_optimization(shutdown_only):
    ray.init(num_cpus=4)

    @ray.remote
    class Actor:
        def __init__(self):
            pass

        def f(self, x):
            pass

    a = Actor.remote()

    @ray.remote
    def fast_value():
        print("fast value")
        pass

    @ray.remote
    def slow_value():
        print("start sleep")
        time.sleep(30)

    @ray.remote
    def runner(f):
        print("runner", a, f)
        return ray.get(a.f.remote(f.remote()))

    runner.remote(slow_value)
    time.sleep(1)
    x2 = runner.remote(fast_value)
    start = time.time()
    ray.get(x2)
    delta = time.time() - start
    assert delta < 10, "did not skip slow value"
示例#4
0
文件: test_advanced.py 项目: rlan/ray
def test_actor_distribution_balance(ray_start_cluster, args):
    cluster = ray_start_cluster

    node_count = args[0]
    actor_count = args[1]

    for i in range(node_count):
        cluster.add_node(memory=1024**3,
                         _system_config={"gcs_actor_scheduling_enabled": True}
                         if i == 0 else {})
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=100 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return ray.worker.global_worker.node.unique_id

    actor_distribution = {}
    actor_list = [Foo.remote() for _ in range(actor_count)]
    for actor in actor_list:
        node_id = ray.get(actor.method.remote())
        if node_id not in actor_distribution.keys():
            actor_distribution[node_id] = []
        actor_distribution[node_id].append(actor)

    if node_count >= actor_count:
        assert len(actor_distribution) == actor_count
        for node_id, actors in actor_distribution.items():
            assert len(actors) == 1
    else:
        assert len(actor_distribution) == node_count
        for node_id, actors in actor_distribution.items():
            assert len(actors) <= int(actor_count / node_count)
示例#5
0
def test_task_arguments_inline_bytes_limit(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(
        num_cpus=1,
        resources={"pin_head": 1},
        _system_config={
            "max_direct_call_object_size": 100 * 1024,
            # if task_rpc_inlined_bytes_limit is greater than
            # max_grpc_message_size, this test fails.
            "task_rpc_inlined_bytes_limit": 18 * 1024,
            "max_grpc_message_size": 20 * 1024,
            "put_small_object_in_memory_store": True,
        },
    )
    cluster.add_node(num_cpus=1, resources={"pin_worker": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"pin_worker": 1})
    def foo(ref1, ref2, ref3):
        return ref1 == ref2 + ref3

    @ray.remote(resources={"pin_head": 1})
    def bar():
        # if the refs are inlined, the test fails.
        # refs = [ray.put(np.random.rand(1024) for _ in range(3))]
        # return ray.get(
        #     foo.remote(refs[0], refs[1], refs[2]))

        return ray.get(
            foo.remote(
                np.random.rand(1024),  # 8k
                np.random.rand(1024),  # 8k
                np.random.rand(1024)))  # 8k

    ray.get(bar.remote())
示例#6
0
def test_future_resolution_skip_plasma(ray_start_cluster):
    cluster = ray_start_cluster
    # Disable worker caching so worker leases are not reused; set object
    # inlining size threshold and enable storing of small objects in in-memory
    # object store so the borrowed ref is inlined.
    cluster.add_node(
        num_cpus=1,
        resources={"pin_head": 1},
        _system_config={
            "worker_lease_timeout_milliseconds": 0,
            "max_direct_call_object_size": 100 * 1024,
            "put_small_object_in_memory_store": True,
        },
    )
    cluster.add_node(num_cpus=1, resources={"pin_worker": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"pin_head": 1})
    def f(x):
        return x + 1

    @ray.remote(resources={"pin_worker": 1})
    def g(x):
        borrowed_ref = x[0]
        f_ref = f.remote(borrowed_ref)
        # borrowed_ref should be inlined on future resolution and shouldn't be
        # in Plasma.
        assert ray.worker.global_worker.core_worker.object_exists(
            borrowed_ref, memory_store_only=True)
        return ray.get(f_ref) * 2

    one = ray.put(1)
    g_ref = g.remote([one])
    assert ray.get(g_ref) == 4
示例#7
0
def test_internal_free(shutdown_only):
    ray.init(num_cpus=1)

    @ray.remote
    class Sampler:
        def sample(self):
            return [1, 2, 3, 4, 5]

        def sample_big(self):
            return np.zeros(1024 * 1024)

    sampler = Sampler.remote()

    # Free deletes from in-memory store.
    obj_ref = sampler.sample.remote()
    ray.get(obj_ref)
    ray.internal.free(obj_ref)
    with pytest.raises(Exception):
        ray.get(obj_ref)

    # Free deletes big objects from plasma store.
    big_id = sampler.sample_big.remote()
    ray.get(big_id)
    ray.internal.free(big_id)
    time.sleep(1)  # wait for delete RPC to propagate
    with pytest.raises(Exception):
        ray.get(big_id)
示例#8
0
def test_redefining_remote_functions(shutdown_only):
    ray.init(num_cpus=1)

    # Test that we can define a remote function in the shell.
    @ray.remote
    def f(x):
        return x + 1

    assert ray.get(f.remote(0)) == 1

    # Test that we can redefine the remote function.
    @ray.remote
    def f(x):
        return x + 10

    while True:
        val = ray.get(f.remote(0))
        assert val in [1, 10]
        if val == 10:
            break
        else:
            logger.info("Still using old definition of f, trying again.")

    # Check that we can redefine functions even when the remote function source
    # doesn't change (see https://github.com/ray-project/ray/issues/6130).
    @ray.remote
    def g():
        return nonexistent()

    with pytest.raises(RayTaskError, match="nonexistent"):
        ray.get(g.remote())

    def nonexistent():
        return 1

    # Redefine the function and make sure it succeeds.
    @ray.remote
    def g():
        return nonexistent()

    assert ray.get(g.remote()) == 1

    # Check the same thing but when the redefined function is inside of another
    # task.
    @ray.remote
    def h(i):
        @ray.remote
        def j():
            return i

        return j.remote()

    for i in range(20):
        assert ray.get(ray.get(h.remote(i))) == i
示例#9
0
def test_object_transfer_dump(ray_start_cluster_enabled):
    cluster = ray_start_cluster_enabled

    num_nodes = 3
    for i in range(num_nodes):
        cluster.add_node(resources={str(i): 1}, object_store_memory=10 ** 9)
    ray.init(address=cluster.address)

    @ray.remote
    def f(x):
        return

    # These objects will live on different nodes.
    object_refs = [f._remote(args=[1], resources={str(i): 1}) for i in range(num_nodes)]

    # Broadcast each object from each machine to each other machine.
    for object_ref in object_refs:
        ray.get(
            [
                f._remote(args=[object_ref], resources={str(i): 1})
                for i in range(num_nodes)
            ]
        )

    # The profiling information only flushes once every second.
    time.sleep(1.1)

    transfer_dump = ray.state.object_transfer_timeline()
    # Make sure the transfer dump can be serialized with JSON.
    json.loads(json.dumps(transfer_dump))
    assert len(transfer_dump) >= num_nodes ** 2
    assert (
        len(
            {
                event["pid"]
                for event in transfer_dump
                if event["name"] == "transfer_receive"
            }
        )
        == num_nodes
    )
    assert (
        len(
            {
                event["pid"]
                for event in transfer_dump
                if event["name"] == "transfer_send"
            }
        )
        == num_nodes
    )
示例#10
0
def test_use_dynamic_function_and_class():
    # Test use dynamically defined functions
    # and classes for remote tasks and actors.
    # See https://github.com/ray-project/ray/issues/12834.
    ray.shutdown()
    current_path = os.path.dirname(__file__)
    job_config = ray.job_config.JobConfig(code_search_path=[current_path])
    ray.init(job_config=job_config)

    def foo1():
        @ray.remote
        def foo2():
            return "OK"

        return foo2

    @ray.remote
    class Foo:
        @ray.method(num_returns=1)
        def foo(self):
            return "OK"

    f = foo1()
    assert ray.get(f.remote()) == "OK"
    # Check whether the dynamic function is exported to GCS.
    # Note, the key format should be kept
    # the same as in `FunctionActorManager.export`.
    key_func = (
        b"RemoteFunction:"
        + ray._private.worker.global_worker.current_job_id.hex().encode()
        + b":"
        + f._function_descriptor.function_id.binary()
    )
    assert ray._private.worker.global_worker.gcs_client.internal_kv_exists(
        key_func, KV_NAMESPACE_FUNCTION_TABLE
    )
    foo_actor = Foo.remote()

    assert ray.get(foo_actor.foo.remote()) == "OK"
    # Check whether the dynamic class is exported to GCS.
    # Note, the key format should be kept
    # the same as in `FunctionActorManager.export_actor_class`.
    key_cls = (
        b"ActorClass:"
        + ray._private.worker.global_worker.current_job_id.hex().encode()
        + b":"
        + foo_actor._ray_actor_creation_function_descriptor.function_id.binary()
    )
    assert ray._private.worker.global_worker.gcs_client.internal_kv_exists(
        key_cls, namespace=KV_NAMESPACE_FUNCTION_TABLE
    )
示例#11
0
def test_defining_remote_functions(shutdown_only):
    ray.init(num_cpus=3)

    # Test that we can close over plain old data.
    data = [
        np.zeros([3, 5]),
        (1, 2, "a"),
        [0.0, 1.0, 1 << 62],
        1 << 60,
        {"a": np.zeros(3)},
    ]

    @ray.remote
    def g():
        return data

    ray.get(g.remote())

    # Test that we can close over modules.
    @ray.remote
    def h():
        return np.zeros([3, 5])

    assert np.alltrue(ray.get(h.remote()) == np.zeros([3, 5]))

    @ray.remote
    def j():
        return time.time()

    ray.get(j.remote())

    # Test that we can define remote functions that call other remote
    # functions.
    @ray.remote
    def k(x):
        return x + 1

    @ray.remote
    def k2(x):
        return ray.get(k.remote(x))

    @ray.remote
    def m(x):
        return ray.get(k2.remote(x))

    assert ray.get(k.remote(1)) == 2
    assert ray.get(k2.remote(1)) == 2
    assert ray.get(m.remote(1)) == 2
示例#12
0
def client_mode_should_convert(*, auto_init: bool):
    """Determines if functions should be converted to client mode & if
    Ray should be auto-initialized.

    NOTE: `auto_init` must happen before we branch into regular ray or client
    code because the initialization may result in either mode.
    """
    if auto_init:
        import ray
        if os.environ.get("RAY_ENABLE_AUTO_CONNECT",
                          "") != "0" and not ray.is_initialized():
            ray.init()

    # `is_client_mode_enabled_by_default` is used for testing with
    # `RAY_CLIENT_MODE=1`. This flag means all tests run with client mode.
    return (is_client_mode_enabled or is_client_mode_enabled_by_default) and \
        _get_client_hook_status_on_thread()
示例#13
0
def test_system_config_when_connecting(ray_start_cluster):
    config = {"object_timeout_milliseconds": 200}
    cluster = Cluster()
    cluster.add_node(_system_config=config, object_store_memory=100 * 1024 * 1024)
    cluster.wait_for_nodes()

    # Specifying _system_config when connecting to a cluster is disallowed.
    with pytest.raises(ValueError):
        ray.init(address=cluster.address, _system_config=config)

    # Check that the config was picked up (object pinning is disabled).
    ray.init(address=cluster.address)
    obj_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))

    for _ in range(5):
        put_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8))
    del put_ref

    ray.get(obj_ref)
示例#14
0
def test_variable_number_of_args(shutdown_only):
    ray.init(num_cpus=1)

    @ray.remote
    def varargs_fct1(*a):
        return " ".join(map(str, a))

    @ray.remote
    def varargs_fct2(a, *b):
        return " ".join(map(str, b))

    x = varargs_fct1.remote(0, 1, 2)
    assert ray.get(x) == "0 1 2"
    x = varargs_fct2.remote(0, 1, 2)
    assert ray.get(x) == "1 2"

    @ray.remote
    def f1(*args):
        return args

    @ray.remote
    def f2(x, y, *args):
        return x, y, args

    assert ray.get(f1.remote()) == ()
    assert ray.get(f1.remote(1)) == (1, )
    assert ray.get(f1.remote(1, 2, 3)) == (1, 2, 3)
    with pytest.raises(Exception):
        f2.remote()
    with pytest.raises(Exception):
        f2.remote(1)
    assert ray.get(f2.remote(1, 2)) == (1, 2, ())
    assert ray.get(f2.remote(1, 2, 3)) == (1, 2, (3, ))
    assert ray.get(f2.remote(1, 2, 3, 4)) == (1, 2, (3, 4))

    def testNoArgs(self):
        @ray.remote
        def no_op():
            pass

        self.ray_start()

        ray.get(no_op.remote())
示例#15
0
def test_caching_functions_to_run(shutdown_only):
    # Test that we export functions to run on all workers before the driver
    # is connected.
    def f(worker_info):
        sys.path.append(1)

    ray.worker.global_worker.run_function_on_all_workers(f)

    def f(worker_info):
        sys.path.append(2)

    ray.worker.global_worker.run_function_on_all_workers(f)

    def g(worker_info):
        sys.path.append(3)

    ray.worker.global_worker.run_function_on_all_workers(g)

    def f(worker_info):
        sys.path.append(4)

    ray.worker.global_worker.run_function_on_all_workers(f)

    ray.init(num_cpus=1)

    @ray.remote
    def get_state():
        time.sleep(1)
        return sys.path[-4], sys.path[-3], sys.path[-2], sys.path[-1]

    res1 = get_state.remote()
    res2 = get_state.remote()
    assert ray.get(res1) == (1, 2, 3, 4)
    assert ray.get(res2) == (1, 2, 3, 4)

    # Clean up the path on the workers.
    def f(worker_info):
        sys.path.pop()
        sys.path.pop()
        sys.path.pop()
        sys.path.pop()

    ray.worker.global_worker.run_function_on_all_workers(f)
示例#16
0
def test_schedule_actor_and_normal_task(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(
        memory=1024 ** 3, _system_config={"gcs_actor_scheduling_enabled": True}
    )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    class Foo:
        def method(self):
            return 2

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    def fun(singal1, signal_actor2):
        signal_actor2.send.remote()
        ray.get(singal1.wait.remote())
        return 1

    singal1 = SignalActor.remote()
    signal2 = SignalActor.remote()

    o1 = fun.remote(singal1, signal2)
    # Make sure the normal task is executing.
    ray.get(signal2.wait.remote())

    # The normal task is blocked now.
    # Try to create actor and make sure this actor is not created for the time
    # being.
    foo = Foo.remote()
    o2 = foo.method.remote()
    ready_list, remaining_list = ray.wait([o2], timeout=2)
    assert len(ready_list) == 0 and len(remaining_list) == 1

    # Send a signal to unblock the normal task execution.
    ray.get(singal1.send.remote())

    # Check the result of normal task.
    assert ray.get(o1) == 1

    # Make sure the actor is created.
    assert ray.get(o2) == 2
示例#17
0
def test_actor_call_order(shutdown_only):
    ray.init(num_cpus=4)

    @ray.remote
    def small_value():
        time.sleep(0.01 * np.random.randint(0, 10))
        return 0

    @ray.remote
    class Actor:
        def __init__(self):
            self.count = 0

        def inc(self, count, dependency):
            assert count == self.count
            self.count += 1
            return count

    a = Actor.remote()
    assert ray.get([a.inc.remote(i, small_value.remote())
                    for i in range(100)]) == list(range(100))
示例#18
0
def test_task_output_inline_bytes_limit(ray_start_cluster):
    cluster = ray_start_cluster
    # Disable worker caching so worker leases are not reused; set object
    # inlining size threshold and enable storing of small objects in in-memory
    # object store so the borrowed ref is inlined.
    # set task_output_inlined_bytes_limit which only allows inline 20 bytes.
    cluster.add_node(
        num_cpus=1,
        resources={"pin_head": 1},
        _system_config={
            "worker_lease_timeout_milliseconds": 0,
            "max_direct_call_object_size": 100 * 1024,
            "task_output_inlined_bytes_limit": 20,
            "put_small_object_in_memory_store": True,
        },
    )
    cluster.add_node(num_cpus=1, resources={"pin_worker": 1})
    ray.init(address=cluster.address)

    @ray.remote(num_returns=5, resources={"pin_head": 1})
    def f():
        return list(range(5))

    @ray.remote(resources={"pin_worker": 1})
    def sum(numbers):
        result = 0
        for i, ref in enumerate(numbers):
            result += ray.get(ref)
            inlined = ray.worker.global_worker.core_worker.object_exists(
                ref, memory_store_only=True)
            if i < 2:
                assert inlined
            else:
                assert not inlined
        return result

    results = f.remote()
    g_ref = sum.remote(results)
    assert ray.get(g_ref) == 10
示例#19
0
def test_worker_lease_reply_with_resources(ray_start_cluster_enabled):
    cluster = ray_start_cluster_enabled
    cluster.add_node(
        memory=2000 * 1024**2,
        num_cpus=1,
        _system_config={
            "gcs_resource_report_poll_period_ms": 1000000,
            "gcs_actor_scheduling_enabled": True,
        },
    )
    node2 = cluster.add_node(memory=1000 * 1024**2, num_cpus=1)
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=1500 * 1024**2, num_cpus=0.01)
    def fun(signal):
        signal.send.remote()
        time.sleep(30)
        return 0

    signal = SignalActor.remote()
    fun.remote(signal)
    # Make sure that the `fun` is running.
    ray.get(signal.wait.remote())

    @ray.remote(memory=800 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return ray.worker.global_worker.node.unique_id

    foo1 = Foo.remote()
    o1 = foo1.method.remote()
    ready_list, remaining_list = ray.wait([o1], timeout=10)
    # If RequestWorkerLeaseReply carries normal task resources,
    # GCS will then schedule foo1 to node2. Otherwise,
    # GCS would keep trying to schedule foo1 to
    # node1 and getting rejected.
    assert len(ready_list) == 1 and len(remaining_list) == 0
    assert ray.get(o1) == node2.unique_id
示例#20
0
def test_schedule_many_actors_and_normal_tasks(ray_start_cluster):
    cluster = ray_start_cluster

    node_count = 10
    actor_count = 50
    each_actor_task_count = 50
    normal_task_count = 1000
    node_memory = 2 * 1024**3

    for i in range(node_count):
        cluster.add_node(
            memory=node_memory,
            _system_config={"gcs_actor_scheduling_enabled": True}
            if i == 0 else {},
        )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=100 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return 2

    @ray.remote(memory=100 * 1024**2, num_cpus=0.01)
    def fun():
        return 1

    normal_task_object_list = [fun.remote() for _ in range(normal_task_count)]
    actor_list = [Foo.remote() for _ in range(actor_count)]
    actor_object_list = [
        actor.method.remote() for _ in range(each_actor_task_count)
        for actor in actor_list
    ]
    for object in ray.get(actor_object_list):
        assert object == 2

    for object in ray.get(normal_task_object_list):
        assert object == 1
示例#21
0
def test_wait_cluster(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"RemoteResource": 1})
    def f():
        return

    # Make sure we have enough workers on the remote nodes to execute some
    # tasks.
    tasks = [f.remote() for _ in range(10)]
    start = time.time()
    ray.get(tasks)
    end = time.time()

    # Submit some more tasks that can only be executed on the remote nodes.
    tasks = [f.remote() for _ in range(10)]
    # Sleep for a bit to let the tasks finish.
    time.sleep((end - start) * 2)
    _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0)
    # All remote tasks should have finished.
    assert len(unready) == 0
示例#22
0
def test_call_matrix(shutdown_only):
    ray.init(object_store_memory=1000 * 1024 * 1024)

    @ray.remote
    class Actor:
        def small_value(self):
            return 0

        def large_value(self):
            return np.zeros(10 * 1024 * 1024)

        def echo(self, x):
            if isinstance(x, list):
                x = ray.get(x[0])
            return x

    @ray.remote
    def small_value():
        return 0

    @ray.remote
    def large_value():
        return np.zeros(10 * 1024 * 1024)

    @ray.remote
    def echo(x):
        if isinstance(x, list):
            x = ray.get(x[0])
        return x

    def check(source_actor, dest_actor, is_large, out_of_band):
        print("CHECKING", "actor" if source_actor else "task", "to",
              "actor" if dest_actor else "task",
              "large_object" if is_large else "small_object",
              "out_of_band" if out_of_band else "in_band")
        if source_actor:
            a = Actor.remote()
            if is_large:
                x_id = a.large_value.remote()
            else:
                x_id = a.small_value.remote()
        else:
            if is_large:
                x_id = large_value.remote()
            else:
                x_id = small_value.remote()
        if out_of_band:
            x_id = [x_id]
        if dest_actor:
            b = Actor.remote()
            x = ray.get(b.echo.remote(x_id))
        else:
            x = ray.get(echo.remote(x_id))
        if is_large:
            assert isinstance(x, np.ndarray)
        else:
            assert isinstance(x, int)

    for is_large in [False, True]:
        for source_actor in [False, True]:
            for dest_actor in [False, True]:
                for out_of_band in [False, True]:
                    check(source_actor, dest_actor, is_large, out_of_band)