def test_actor_recursive(ray_start_regular_shared): @ray.remote class Actor: def __init__(self, delegate=None): self.delegate = delegate def f(self, x): if self.delegate: return ray.get(self.delegate.f.remote(x)) return x * 2 a = Actor.remote() b = Actor.remote(a) c = Actor.remote(b) result = ray.get([c.f.remote(i) for i in range(100)]) assert result == [x * 2 for x in range(100)] result, _ = ray.wait([c.f.remote(i) for i in range(100)], num_returns=100) result = ray.get(result) assert result == [x * 2 for x in range(100)]
def __dir__(self) -> List[str]: if self._dir is not None: return self._dir if ray.is_connected(): @ray.remote(num_cpus=0) def get_dir(x): return dir(x) self._dir = ray.get(get_dir.remote(self)) return self._dir return super().__dir__()
def sum(): numbers = f.remote() result = 0 for i, ref in enumerate(numbers): result += ray.get(ref) inlined = ray.worker.global_worker.core_worker.object_exists( ref, memory_store_only=True) if i < 2: assert inlined else: assert not inlined return result
def bar(): # if the refs are inlined, the test fails. # refs = [ray.put(np.random.rand(1024) for _ in range(3))] # return ray.get( # foo.remote(refs[0], refs[1], refs[2])) return ray.get( foo.remote( np.random.rand(1024), # 8k np.random.rand(1024), # 8k np.random.rand(1024), )) # 8k
def test_call_actors_indirect_through_tasks(ray_start_regular_shared): @ray.remote class Counter: def __init__(self, value): self.value = int(value) def increase(self, delta): self.value += int(delta) return self.value @ray.remote def foo(object): return ray.get(object.increase.remote(1)) @ray.remote def bar(object): return ray.get(object.increase.remote(1)) @ray.remote def zoo(object): return ray.get(object[0].increase.remote(1)) c = Counter.remote(0) for _ in range(0, 100): ray.get(foo.remote(c)) ray.get(bar.remote(c)) ray.get(zoo.remote([c]))
def test_create_remote_before_start(ray_start_regular_shared): """Creates remote objects (as though in a library) before starting the client. """ from ray.util.client import ray @ray.remote class Returner: def doit(self): return "foo" @ray.remote def f(x): return x + 20 # Prints in verbose tests print("Created remote functions") with ray_start_client_server() as ray: assert ray.get(f.remote(3)) == 23 a = Returner.remote() assert ray.get(a.doit.remote()) == "foo"
def test_redefining_remote_functions(shutdown_only): ray.init(num_cpus=1) # Test that we can define a remote function in the shell. @ray.remote def f(x): return x + 1 assert ray.get(f.remote(0)) == 1 # Test that we can redefine the remote function. @ray.remote def f(x): return x + 10 while True: val = ray.get(f.remote(0)) assert val in [1, 10] if val == 10: break else: logger.info("Still using old definition of f, trying again.") # Check that we can redefine functions even when the remote function source # doesn't change (see https://github.com/ray-project/ray/issues/6130). @ray.remote def g(): return nonexistent() with pytest.raises(RayTaskError, match="nonexistent"): ray.get(g.remote()) def nonexistent(): return 1 # Redefine the function and make sure it succeeds. @ray.remote def g(): return nonexistent() assert ray.get(g.remote()) == 1 # Check the same thing but when the redefined function is inside of another # task. @ray.remote def h(i): @ray.remote def j(): return i return j.remote() for i in range(20): assert ray.get(ray.get(h.remote(i))) == i
def _init_class_info(self): # TODO: fetch Ray method decorators @ray.remote(num_cpus=0) def get_class_info(x): return x._ray_method_num_returns, x._ray_method_signatures self._method_num_returns, method_parameters = ray.get( get_class_info.remote(self)) self._method_signatures = {} for method, parameters in method_parameters.items(): self._method_signatures[method] = inspect.Signature( parameters=parameters)
def test_object_transfer_dump(ray_start_cluster): cluster = ray_start_cluster num_nodes = 3 for i in range(num_nodes): cluster.add_node(resources={str(i): 1}, object_store_memory=10**9) ray.init(address=cluster.address) @ray.remote def f(x): return # These objects will live on different nodes. object_refs = [ f._remote(args=[1], resources={str(i): 1}) for i in range(num_nodes) ] # Broadcast each object from each machine to each other machine. for object_ref in object_refs: ray.get([ f._remote(args=[object_ref], resources={str(i): 1}) for i in range(num_nodes) ]) # The profiling information only flushes once every second. time.sleep(1.1) transfer_dump = ray.state.object_transfer_timeline() # Make sure the transfer dump can be serialized with JSON. json.loads(json.dumps(transfer_dump)) assert len(transfer_dump) >= num_nodes**2 assert len({ event["pid"] for event in transfer_dump if event["name"] == "transfer_receive" }) == num_nodes assert len({ event["pid"] for event in transfer_dump if event["name"] == "transfer_send" }) == num_nodes
def test_basic_named_actor(ray_start_regular_shared): """Test that ray.get_actor() can create and return a detached actor. """ with ray_start_client_server() as ray: @ray.remote class Accumulator: def __init__(self): self.x = 0 def inc(self): self.x += 1 def get(self): return self.x # Create the actor actor = Accumulator.options(name="test_acc").remote() actor.inc.remote() actor.inc.remote() # Make sure the get_actor call works new_actor = ray.get_actor("test_acc") new_actor.inc.remote() assert ray.get(new_actor.get.remote()) == 3 del actor actor = Accumulator.options(name="test_acc2", lifetime="detached").remote() actor.inc.remote() del actor detatched_actor = ray.get_actor("test_acc2") for i in range(5): detatched_actor.inc.remote() assert ray.get(detatched_actor.get.remote()) == 6
def test_worker_lease_reply_with_resources(ray_start_cluster_enabled): cluster = ray_start_cluster_enabled cluster.add_node( memory=2000 * 1024**2, num_cpus=1, _system_config={ "gcs_resource_report_poll_period_ms": 1000000, "gcs_actor_scheduling_enabled": True, }, ) node2 = cluster.add_node(memory=1000 * 1024**2, num_cpus=1) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=1500 * 1024**2, num_cpus=0.01) def fun(signal): signal.send.remote() time.sleep(30) return 0 signal = SignalActor.remote() fun.remote(signal) # Make sure that the `fun` is running. ray.get(signal.wait.remote()) @ray.remote(memory=800 * 1024**2, num_cpus=0.01) class Foo: def method(self): return ray.worker.global_worker.node.unique_id foo1 = Foo.remote() o1 = foo1.method.remote() ready_list, remaining_list = ray.wait([o1], timeout=10) # If RequestWorkerLeaseReply carries normal task resources, # GCS will then schedule foo1 to node2. Otherwise, # GCS would keep trying to schedule foo1 to # node1 and getting rejected. assert len(ready_list) == 1 and len(remaining_list) == 0 assert ray.get(o1) == node2.unique_id
def test_system_config_when_connecting(ray_start_cluster): config = {"object_pinning_enabled": 0, "object_timeout_milliseconds": 200} cluster = ray.cluster_utils.Cluster() cluster.add_node( _system_config=config, object_store_memory=100 * 1024 * 1024) cluster.wait_for_nodes() # Specifying _system_config when connecting to a cluster is disallowed. with pytest.raises(ValueError): ray.init(address=cluster.address, _system_config=config) # Check that the config was picked up (object pinning is disabled). ray.init(address=cluster.address) obj_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8)) for _ in range(5): put_ref = ray.put(np.zeros(40 * 1024 * 1024, dtype=np.uint8)) del put_ref # This would not raise an exception if object pinning was enabled. with pytest.raises(ray.exceptions.ObjectLostError): ray.get(obj_ref)
def test_schedule_many_actors_and_normal_tasks(ray_start_cluster): cluster = ray_start_cluster node_count = 10 actor_count = 50 each_actor_task_count = 50 normal_task_count = 1000 node_memory = 2 * 1024**3 for i in range(node_count): cluster.add_node( memory=node_memory, _system_config={"gcs_actor_scheduling_enabled": True} if i == 0 else {}, ) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=100 * 1024**2, num_cpus=0.01) class Foo: def method(self): return 2 @ray.remote(memory=100 * 1024**2, num_cpus=0.01) def fun(): return 1 normal_task_object_list = [fun.remote() for _ in range(normal_task_count)] actor_list = [Foo.remote() for _ in range(actor_count)] actor_object_list = [ actor.method.remote() for _ in range(each_actor_task_count) for actor in actor_list ] for object in ray.get(actor_object_list): assert object == 2 for object in ray.get(normal_task_object_list): assert object == 1
def test_defining_remote_functions(shutdown_only): ray.init(num_cpus=3) # Test that we can close over plain old data. data = [ np.zeros([3, 5]), (1, 2, "a"), [0.0, 1.0, 1 << 62], 1 << 60, {"a": np.zeros(3)}, ] @ray.remote def g(): return data ray.get(g.remote()) # Test that we can close over modules. @ray.remote def h(): return np.zeros([3, 5]) assert np.alltrue(ray.get(h.remote()) == np.zeros([3, 5])) @ray.remote def j(): return time.time() ray.get(j.remote()) # Test that we can define remote functions that call other remote # functions. @ray.remote def k(x): return x + 1 @ray.remote def k2(x): return ray.get(k.remote(x)) @ray.remote def m(x): return ray.get(k2.remote(x)) assert ray.get(k.remote(1)) == 2 assert ray.get(k2.remote(1)) == 2 assert ray.get(m.remote(1)) == 2
def test_skip_plasma(ray_start_regular_shared): @ray.remote class Actor: def __init__(self): pass def f(self, x): return x * 2 a = Actor.remote() obj_ref = a.f.remote(1) # it is not stored in plasma assert not ray.worker.global_worker.core_worker.object_exists(obj_ref) assert ray.get(obj_ref) == 2
def background_thread(self, wait_objects): try: # Test wait ready, _ = ray.wait( wait_objects, num_returns=len(wait_objects), timeout=1000.0, ) assert len(ready) == len(wait_objects) for _ in range(20): num = 10 # Test remote call results = [echo.remote(i) for i in range(num)] assert ray.get(results) == list(range(num)) # Test put and get objects = [ray.put(i) for i in range(num)] assert ray.get(objects) == list(range(num)) time.sleep(random.randint(0, 10) / 1000.0) except Exception as e: with self.lock: self.thread_results.append(e) else: with self.lock: self.thread_results.append("ok")
def test_wait_cluster(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) ray.init(address=cluster.address) @ray.remote(resources={"RemoteResource": 1}) def f(): return # Make sure we have enough workers on the remote nodes to execute some # tasks. tasks = [f.remote() for _ in range(10)] start = time.time() ray.get(tasks) end = time.time() # Submit some more tasks that can only be executed on the remote nodes. tasks = [f.remote() for _ in range(10)] # Sleep for a bit to let the tasks finish. time.sleep((end - start) * 2) _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0) # All remote tasks should have finished. assert len(unready) == 0
def test_get_with_timeout(ray_start_regular_shared): SignalActor = create_remote_signal_actor(ray) signal = SignalActor.remote() # Check that get() returns early if object is ready. start = time.time() ray.get(signal.wait.remote(should_wait=False), timeout=30) assert time.time() - start < 30 # Check that get() raises a TimeoutError after the timeout if the object # is not ready yet. result_id = signal.wait.remote() with pytest.raises(GetTimeoutError): ray.get(result_id, timeout=0.1) assert issubclass(GetTimeoutError, TimeoutError) with pytest.raises(TimeoutError): ray.get(result_id, timeout=0.1) # Check that a subsequent get() returns early. ray.get(signal.send.remote()) start = time.time() ray.get(result_id, timeout=30) assert time.time() - start < 30
def test_actor_concurrent(ray_start_regular_shared): @ray.remote class Batcher: def __init__(self): self.batch = [] self.event = threading.Event() def add(self, x): self.batch.append(x) if len(self.batch) >= 3: self.event.set() else: self.event.wait() return sorted(self.batch) a = Batcher.options(max_concurrency=3).remote() x1 = a.add.remote(1) x2 = a.add.remote(2) x3 = a.add.remote(3) r1 = ray.get(x1) r2 = ray.get(x2) r3 = ray.get(x3) assert r1 == [1, 2, 3] assert r1 == r2 == r3
def test_task_arguments_inline_bytes_limit(ray_start_cluster): cluster = ray_start_cluster cluster.add_node( num_cpus=1, resources={"pin_head": 1}, _system_config={ "max_direct_call_object_size": 100 * 1024, # if task_rpc_inlined_bytes_limit is greater than # max_grpc_message_size, this test fails. "task_rpc_inlined_bytes_limit": 18 * 1024, "max_grpc_message_size": 20 * 1024, }, ) cluster.add_node(num_cpus=1, resources={"pin_worker": 1}) ray.init(address=cluster.address) @ray.remote(resources={"pin_worker": 1}) def foo(ref1, ref2, ref3): return ref1 == ref2 + ref3 @ray.remote(resources={"pin_head": 1}) def bar(): # if the refs are inlined, the test fails. # refs = [ray.put(np.random.rand(1024) for _ in range(3))] # return ray.get( # foo.remote(refs[0], refs[1], refs[2])) return ray.get( foo.remote( np.random.rand(1024), # 8k np.random.rand(1024), # 8k np.random.rand(1024), ) ) # 8k ray.get(bar.remote())
def test_actor_large_objects(ray_start_regular_shared): @ray.remote class Actor: def __init__(self): pass def f(self): time.sleep(1) return np.zeros(10000000) a = Actor.remote() obj_ref = a.f.remote() assert not ray.worker.global_worker.core_worker.object_exists(obj_ref) done, _ = ray.wait([obj_ref]) assert len(done) == 1 assert ray.worker.global_worker.core_worker.object_exists(obj_ref) assert isinstance(ray.get(obj_ref), np.ndarray)
def test_variable_number_of_args(shutdown_only): ray.init(num_cpus=1) @ray.remote def varargs_fct1(*a): return " ".join(map(str, a)) @ray.remote def varargs_fct2(a, *b): return " ".join(map(str, b)) x = varargs_fct1.remote(0, 1, 2) assert ray.get(x) == "0 1 2" x = varargs_fct2.remote(0, 1, 2) assert ray.get(x) == "1 2" @ray.remote def f1(*args): return args @ray.remote def f2(x, y, *args): return x, y, args assert ray.get(f1.remote()) == () assert ray.get(f1.remote(1)) == (1, ) assert ray.get(f1.remote(1, 2, 3)) == (1, 2, 3) with pytest.raises(Exception): f2.remote() with pytest.raises(Exception): f2.remote(1) assert ray.get(f2.remote(1, 2)) == (1, 2, ()) assert ray.get(f2.remote(1, 2, 3)) == (1, 2, (3, )) assert ray.get(f2.remote(1, 2, 3, 4)) == (1, 2, (3, 4)) def testNoArgs(self): @ray.remote def no_op(): pass self.ray_start() ray.get(no_op.remote())
def test_actor_call_order(shutdown_only): ray.init(num_cpus=4) @ray.remote def small_value(): time.sleep(0.01 * np.random.randint(0, 10)) return 0 @ray.remote class Actor: def __init__(self): self.count = 0 def inc(self, count, dependency): assert count == self.count self.count += 1 return count a = Actor.remote() assert ray.get([a.inc.remote(i, small_value.remote()) for i in range(100)]) == list(range(100))
def test_task_output_inline_bytes_limit(ray_start_cluster): cluster = ray_start_cluster # Disable worker caching so worker leases are not reused; set object # inlining size threshold and enable storing of small objects in in-memory # object store so the borrowed ref is inlined. # set task_output_inlined_bytes_limit which only allows inline 20 bytes. cluster.add_node( num_cpus=1, resources={"pin_head": 1}, _system_config={ "worker_lease_timeout_milliseconds": 0, "max_direct_call_object_size": 100 * 1024, "task_output_inlined_bytes_limit": 20, "put_small_object_in_memory_store": True, }, ) cluster.add_node(num_cpus=1, resources={"pin_worker": 1}) ray.init(address=cluster.address) @ray.remote(num_returns=5, resources={"pin_head": 1}) def f(): return list(range(5)) @ray.remote(resources={"pin_worker": 1}) def sum(numbers): result = 0 for i, ref in enumerate(numbers): result += ray.get(ref) inlined = ray.worker.global_worker.core_worker.object_exists( ref, memory_store_only=True) if i < 2: assert inlined else: assert not inlined return result results = f.remote() g_ref = sum.remote(results) assert ray.get(g_ref) == 10
def test_actor_distribution_balance(ray_start_cluster_enabled, args): cluster = ray_start_cluster_enabled node_count = args[0] actor_count = args[1] for i in range(node_count): cluster.add_node( memory=1024**3, _system_config={"gcs_actor_scheduling_enabled": True} if i == 0 else {}, ) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=100 * 1024**2, num_cpus=0.01, scheduling_strategy="SPREAD") class Foo: def method(self): return ray.worker.global_worker.node.unique_id actor_distribution = {} actor_list = [Foo.remote() for _ in range(actor_count)] for actor in actor_list: node_id = ray.get(actor.method.remote()) if node_id not in actor_distribution.keys(): actor_distribution[node_id] = [] actor_distribution[node_id].append(actor) if node_count >= actor_count: assert len(actor_distribution) == actor_count for node_id, actors in actor_distribution.items(): assert len(actors) == 1 else: assert len(actor_distribution) == node_count for node_id, actors in actor_distribution.items(): assert len(actors) <= int(actor_count / node_count)
def test_duplicate_args(ray_start_regular_shared): @ray.remote def f(arg1, arg2, arg1_duplicate, kwarg1=None, kwarg2=None, kwarg1_duplicate=None): assert arg1 == kwarg1 assert arg1 != arg2 assert arg1 == arg1_duplicate assert kwarg1 != kwarg2 assert kwarg1 == kwarg1_duplicate # Test by-value arguments. arg1 = [1] arg2 = [2] ray.get(f.remote(arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1)) # Test by-reference arguments. arg1 = ray.put([1]) arg2 = ray.put([2]) ray.get(f.remote(arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1)) # Test by-reference arguments on an actor task. @ray.remote class Actor: def f( self, arg1, arg2, arg1_duplicate, kwarg1=None, kwarg2=None, kwarg1_duplicate=None, ): assert arg1 == kwarg1 assert arg1 != arg2 assert arg1 == arg1_duplicate assert kwarg1 != kwarg2 assert kwarg1 == kwarg1_duplicate actor = Actor.remote() ray.get( actor.f.remote( arg1, arg2, arg1, kwarg1=arg1, kwarg2=arg2, kwarg1_duplicate=arg1 ) )
def test_schedule_actor_and_normal_task(ray_start_cluster): cluster = ray_start_cluster cluster.add_node( memory=1024 ** 3, _system_config={"gcs_actor_scheduling_enabled": True} ) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01) class Foo: def method(self): return 2 @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01) def fun(singal1, signal_actor2): signal_actor2.send.remote() ray.get(singal1.wait.remote()) return 1 singal1 = SignalActor.remote() signal2 = SignalActor.remote() o1 = fun.remote(singal1, signal2) # Make sure the normal task is executing. ray.get(signal2.wait.remote()) # The normal task is blocked now. # Try to create actor and make sure this actor is not created for the time # being. foo = Foo.remote() o2 = foo.method.remote() ready_list, remaining_list = ray.wait([o2], timeout=2) assert len(ready_list) == 0 and len(remaining_list) == 1 # Send a signal to unblock the normal task execution. ray.get(singal1.send.remote()) # Check the result of normal task. assert ray.get(o1) == 1 # Make sure the actor is created. assert ray.get(o2) == 2
def f(self, x): if self.delegate: return ray.get(self.delegate.f.remote(x)) return x * 2
def zoo(object): return ray.get(object[0].increase.remote(1))
def bar(object): return ray.get(object.increase.remote(1))