def test_global_state_api(shutdown_only): error_message = ("The ray global state API cannot be used " "before ray.init has been called.") with pytest.raises(Exception, match=error_message): ray.objects() with pytest.raises(Exception, match=error_message): ray.actors() with pytest.raises(Exception, match=error_message): ray.nodes() with pytest.raises(Exception, match=error_message): ray.jobs() ray.init(num_cpus=5, num_gpus=3, resources={"CustomResource": 1}) assert ray.cluster_resources()["CPU"] == 5 assert ray.cluster_resources()["GPU"] == 3 assert ray.cluster_resources()["CustomResource"] == 1 assert ray.objects() == {} job_id = ray.utils.compute_job_id_from_driver( ray.WorkerID(ray.worker.global_worker.worker_id)) client_table = ray.nodes() node_ip_address = ray.worker.global_worker.node_ip_address assert len(client_table) == 1 assert client_table[0]["NodeManagerAddress"] == node_ip_address @ray.remote class Actor: def __init__(self): pass _ = Actor.remote() # noqa: F841 # Wait for actor to be created wait_for_num_actors(1) actor_table = ray.actors() assert len(actor_table) == 1 actor_info, = actor_table.values() assert actor_info["JobID"] == job_id.hex() assert "IPAddress" in actor_info["Address"] assert "IPAddress" in actor_info["OwnerAddress"] assert actor_info["Address"]["Port"] != actor_info["OwnerAddress"]["Port"] job_table = ray.jobs() assert len(job_table) == 1 assert job_table[0]["JobID"] == job_id.hex() assert job_table[0]["NodeManagerAddress"] == node_ip_address
def test_global_state_api(shutdown_only): ray.init(num_cpus=5, num_gpus=3, resources={"CustomResource": 1}) assert ray.cluster_resources()["CPU"] == 5 assert ray.cluster_resources()["GPU"] == 3 assert ray.cluster_resources()["CustomResource"] == 1 # A driver/worker creates a temporary object during startup. Although the # temporary object is freed immediately, in a rare case, we can still find # the object ref in GCS because Raylet removes the object ref from GCS # asynchronously. # Because we can't control when workers create the temporary objects, so # We can't assert that `ray.objects()` returns an empty dict. Here we just # make sure `ray.objects()` succeeds. assert len(ray.objects()) >= 0 job_id = ray.utils.compute_job_id_from_driver( ray.WorkerID(ray.worker.global_worker.worker_id)) client_table = ray.nodes() node_ip_address = ray.worker.global_worker.node_ip_address assert len(client_table) == 1 assert client_table[0]["NodeManagerAddress"] == node_ip_address @ray.remote class Actor: def __init__(self): pass _ = Actor.options(name="test_actor").remote() # noqa: F841 # Wait for actor to be created wait_for_num_actors(1) actor_table = ray.actors() assert len(actor_table) == 1 actor_info, = actor_table.values() assert actor_info["JobID"] == job_id.hex() assert actor_info["Name"] == "test_actor" assert "IPAddress" in actor_info["Address"] assert "IPAddress" in actor_info["OwnerAddress"] assert actor_info["Address"]["Port"] != actor_info["OwnerAddress"]["Port"] job_table = ray.jobs() assert len(job_table) == 1 assert job_table[0]["JobID"] == job_id.hex() assert job_table[0]["DriverIPAddress"] == node_ip_address
def test_job_gc(call_ray_start): address = call_ray_start ray.init(address=address) driver = """ import ray ray.init(address="{}") @ray.remote class Actor: def __init__(self): pass _ = Actor.remote() """.format(address) p = run_string_as_driver_nonblocking(driver) # Wait for actor to be created wait_for_num_actors(1) actor_table = ray.actors() assert len(actor_table) == 1 job_table = ray.jobs() assert len(job_table) == 2 # dash # Kill the driver process. p.kill() p.wait() def actor_finish(): actor_table = ray.actors() if (len(actor_table) == 0): return True else: return False wait_for_condition(actor_finish)
def test_job_gc_with_detached_actor(call_ray_start): address = call_ray_start ray.init(address=address) driver = """ import ray ray.init(address="{}") @ray.remote class Actor: def __init__(self): pass def value(self): return 1 _ = Actor.options(lifetime="detached", name="DetachedActor").remote() # Make sure the actor is created before the driver exits. ray.get(_.value.remote()) """.format(address) p = run_string_as_driver_nonblocking(driver) # Wait for actor to be created wait_for_num_actors(1, ray.gcs_utils.ActorTableData.ALIVE) actor_table = ray.actors() assert len(actor_table) == 1 job_table = ray.jobs() assert len(job_table) == 2 # dash # Kill the driver process. p.kill() p.wait() detached_actor = ray.get_actor("DetachedActor") assert ray.get(detached_actor.value.remote()) == 1
def test_job_gc_with_detached_actor(call_ray_start): address = call_ray_start ray.init(address=address) driver = """ import ray ray.init(address="{}") @ray.remote class Actor: def __init__(self): pass def value(self): return 1 _ = Actor.options(name="DetachedActor").remote() """.format(address) p = run_string_as_driver_nonblocking(driver) # Wait for actor to be created wait_for_num_actors(1) actor_table = ray.actors() assert len(actor_table) == 1 job_table = ray.jobs() assert len(job_table) == 2 # Kill the driver process. p.kill() p.wait() detached_actor = ray.get_actor("DetachedActor") assert ray.get(detached_actor.value.remote()) == 1
def is_job_done(): jobs = ray.jobs() for job in jobs: if "StopTime" in job: return True return False
def test_global_state_api(shutdown_only): error_message = ("The ray global state API cannot be used " "before ray.init has been called.") with pytest.raises(Exception, match=error_message): ray.objects() with pytest.raises(Exception, match=error_message): ray.actors() with pytest.raises(Exception, match=error_message): ray.tasks() with pytest.raises(Exception, match=error_message): ray.nodes() with pytest.raises(Exception, match=error_message): ray.jobs() ray.init(num_cpus=5, num_gpus=3, resources={"CustomResource": 1}) assert ray.cluster_resources()["CPU"] == 5 assert ray.cluster_resources()["GPU"] == 3 assert ray.cluster_resources()["CustomResource"] == 1 assert ray.objects() == {} job_id = ray.utils.compute_job_id_from_driver( ray.WorkerID(ray.worker.global_worker.worker_id)) driver_task_id = ray.worker.global_worker.current_task_id.hex() # One task is put in the task table which corresponds to this driver. wait_for_num_tasks(1) task_table = ray.tasks() assert len(task_table) == 1 assert driver_task_id == list(task_table.keys())[0] task_spec = task_table[driver_task_id]["TaskSpec"] nil_unique_id_hex = ray.UniqueID.nil().hex() nil_actor_id_hex = ray.ActorID.nil().hex() assert task_spec["TaskID"] == driver_task_id assert task_spec["ActorID"] == nil_actor_id_hex assert task_spec["Args"] == [] assert task_spec["JobID"] == job_id.hex() assert task_spec["FunctionID"] == nil_unique_id_hex assert task_spec["ReturnObjectIDs"] == [] client_table = ray.nodes() node_ip_address = ray.worker.global_worker.node_ip_address assert len(client_table) == 1 assert client_table[0]["NodeManagerAddress"] == node_ip_address @ray.remote class Actor: def __init__(self): pass _ = Actor.remote() # Wait for actor to be created wait_for_num_actors(1) actor_table = ray.actors() assert len(actor_table) == 1 actor_info, = actor_table.values() assert actor_info["JobID"] == job_id.hex() assert "IPAddress" in actor_info["Address"] assert "IPAddress" in actor_info["OwnerAddress"] assert actor_info["Address"]["Port"] != actor_info["OwnerAddress"]["Port"] job_table = ray.jobs() assert len(job_table) == 1 assert job_table[0]["JobID"] == job_id.hex() assert job_table[0]["NodeManagerAddress"] == node_ip_address
def test_global_state_api(shutdown_only): error_message = ("The ray global state API cannot be used " "before ray.init has been called.") with pytest.raises(Exception, match=error_message): ray.objects() with pytest.raises(Exception, match=error_message): ray.tasks() with pytest.raises(Exception, match=error_message): ray.nodes() with pytest.raises(Exception, match=error_message): ray.jobs() ray.init(num_cpus=5, num_gpus=3, resources={"CustomResource": 1}) assert ray.cluster_resources()["CPU"] == 5 assert ray.cluster_resources()["GPU"] == 3 assert ray.cluster_resources()["CustomResource"] == 1 assert ray.objects() == {} job_id = ray.utils.compute_job_id_from_driver( ray.WorkerID(ray.worker.global_worker.worker_id)) driver_task_id = ray.worker.global_worker.current_task_id.hex() # One task is put in the task table which corresponds to this driver. wait_for_num_tasks(1) task_table = ray.tasks() assert len(task_table) == 1 assert driver_task_id == list(task_table.keys())[0] task_spec = task_table[driver_task_id]["TaskSpec"] nil_unique_id_hex = ray.UniqueID.nil().hex() nil_actor_id_hex = ray.ActorID.nil().hex() assert task_spec["TaskID"] == driver_task_id assert task_spec["ActorID"] == nil_actor_id_hex assert task_spec["Args"] == [] assert task_spec["JobID"] == job_id.hex() assert task_spec["FunctionID"] == nil_unique_id_hex assert task_spec["ReturnObjectIDs"] == [] client_table = ray.nodes() node_ip_address = ray.worker.global_worker.node_ip_address assert len(client_table) == 1 assert client_table[0]["NodeManagerAddress"] == node_ip_address @ray.remote def f(*xs): return 1 x_id = ray.put(1) result_id = f.remote(1, "hi", x_id) # Wait for one additional task to complete. wait_for_num_tasks(1 + 1) task_table = ray.tasks() assert len(task_table) == 1 + 1 task_id_set = set(task_table.keys()) task_id_set.remove(driver_task_id) task_id = list(task_id_set)[0] task_spec = task_table[task_id]["TaskSpec"] assert task_spec["ActorID"] == nil_actor_id_hex assert task_spec["Args"] == [ signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "hi", signature.DUMMY_TYPE, x_id ] assert task_spec["JobID"] == job_id.hex() assert task_spec["ReturnObjectIDs"] == [result_id] assert task_table[task_id] == ray.tasks(task_id) # Wait for two objects, one for the x_id and one for result_id. wait_for_num_objects(2) def wait_for_object_table(): timeout = 10 start_time = time.time() while time.time() - start_time < timeout: object_table = ray.objects() tables_ready = (object_table[x_id]["ManagerIDs"] is not None and object_table[result_id]["ManagerIDs"] is not None) if tables_ready: return time.sleep(0.1) raise RayTestTimeoutException( "Timed out while waiting for object table to " "update.") object_table = ray.objects() assert len(object_table) == 2 assert object_table[x_id] == ray.objects(x_id) object_table_entry = ray.objects(result_id) assert object_table[result_id] == object_table_entry job_table = ray.jobs() assert len(job_table) == 1 assert job_table[0]["JobID"] == job_id.hex() assert job_table[0]["NodeManagerAddress"] == node_ip_address