示例#1
0
def test_ray_start_non_head(call_ray_stop_only, monkeypatch):

    # Test that we can call ray start to connect to an existing cluster.

    # Test starting Ray with a port specified.
    check_call_ray(["start", "--head", "--port", "7298", "--resources", '{"res_0": 1}'])

    # Test starting node connecting to the above cluster.
    check_call_ray(
        ["start", "--address", "127.0.0.1:7298", "--resources", '{"res_1": 1}']
    )

    # Test starting Ray with address `auto`.
    check_call_ray(["start", "--address", "auto", "--resources", '{"res_2": 1}'])

    # Run tasks to verify nodes with custom resources are available.
    driver_script = """
import ray
ray.init()

@ray.remote
def f():
    return 1

assert ray.get(f.remote()) == 1
assert ray.get(f.options(resources={"res_0": 1}).remote()) == 1
assert ray.get(f.options(resources={"res_1": 1}).remote()) == 1
assert ray.get(f.options(resources={"res_2": 1}).remote()) == 1
print("success")
"""
    monkeypatch.setenv("RAY_ADDRESS", "auto")
    out = run_string_as_driver(driver_script)
    # Make sure the driver succeeded.
    assert "success" in out

    check_call_ray(["stop"])
示例#2
0
def test_ray_stop_should_not_kill_external_redis(redis_proc):
    check_call_ray(["start", "--head"])
    subprocess.check_call(["ray", "stop"])
    assert redis_proc.poll() is None
示例#3
0
def test_calling_start_ray_head(call_ray_stop_only):

    # Test that we can call ray start with various command line
    # parameters.

    # Test starting Ray with a redis port specified.
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a node IP address specified.
    check_call_ray(
        ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a system config parameter set.
    check_call_ray([
        "start",
        "--head",
        "--system-config",
        '{"metrics_report_interval_ms":100}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the object manager and node manager ports
    # specified.
    check_call_ray([
        "start",
        "--head",
        "--object-manager-port",
        "22345",
        "--node-manager-port",
        "54321",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the worker port range specified.
    check_call_ray([
        "start",
        "--head",
        "--min-worker-port",
        "51000",
        "--max-worker-port",
        "51050",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with a worker port list.
    check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"])
    check_call_ray(["stop"])

    # Test starting Ray with a non-int in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "10002,a"])
    check_call_ray(["stop"])

    # Test starting Ray with an invalid port in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "100"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of CPUs specified.
    check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of GPUs specified.
    check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with redis shard ports specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port",
        "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with all arguments specified.
    check_call_ray([
        "start",
        "--head",
        "--redis-shard-ports",
        "6380,6381,6382",
        "--object-manager-port",
        "22345",
        "--num-cpus",
        "2",
        "--num-gpus",
        "0",
        "--resources",
        '{"Custom": 1}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    temp_dir = ray._private.utils.get_ray_temp_dir()

    # Test starting Ray with RAY_REDIS_ADDRESS env.
    _, proc = _start_redis_instance(
        REDIS_EXECUTABLE,
        temp_dir,
        8888,
        password=ray_constants.REDIS_DEFAULT_PASSWORD)
    os.environ["RAY_REDIS_ADDRESS"] = "127.0.0.1:8888"
    check_call_ray(["start", "--head"])
    check_call_ray(["stop"])
    proc.process.terminate()
    del os.environ["RAY_REDIS_ADDRESS"]

    # Test --block. Killing a child process should cause the command to exit.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])

    blocked.poll()
    assert blocked.returncode is None
    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    kill_process_by_name("raylet", SIGKILL=True)
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"

    # Test --block. Killing the command should clean up all child processes.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])
    blocked.poll()
    assert blocked.returncode is None

    # Include GCS, autoscaler monitor, client server, dashboard, raylet and
    # log_monitor.py
    num_children = 6
    if not detect_fate_sharing_support():
        # Account for ray_process_reaper.py
        num_children += 1
    # Check a set of child process commands & scripts instead?
    wait_for_children_of_pid(blocked.pid,
                             num_children=num_children,
                             timeout=30)

    blocked.terminate()
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"
示例#4
0
def test_calling_start_ray_head(call_ray_stop_only):

    # Test that we can call ray start with various command line
    # parameters. TODO(rkn): This test only tests the --head code path. We
    # should also test the non-head node code path.

    # Test starting Ray with a redis port specified.
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a node IP address specified.
    check_call_ray(
        ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a system config parameter set.
    check_call_ray([
        "start", "--head", "--system-config",
        "{\"metrics_report_interval_ms\":100}", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the object manager and node manager ports
    # specified.
    check_call_ray([
        "start", "--head", "--object-manager-port", "12345",
        "--node-manager-port", "54321", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the worker port range specified.
    check_call_ray([
        "start", "--head", "--min-worker-port", "51000", "--max-worker-port",
        "51050", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with a worker port list.
    check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"])
    check_call_ray(["stop"])

    # Test starting Ray with a non-int in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "10002,a"])
    check_call_ray(["stop"])

    # Test starting Ray with an invalid port in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "100"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of CPUs specified.
    check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of GPUs specified.
    check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with redis shard ports specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port",
        "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with all arguments specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382",
        "--object-manager-port", "12345", "--num-cpus", "2", "--num-gpus", "0",
        "--resources", "{\"Custom\": 1}", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with invalid external address.
    # It will fall back to creating a new one.
    check_call_ray(
        ["start", "--head", "--address", "127.0.0.1:6379", "--port", "0"])
    check_call_ray(["stop"])

    # Test --block. Killing a child process should cause the command to exit.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])

    wait_for_children_names_of_pid(blocked.pid, ["raylet"], timeout=30)

    blocked.poll()
    assert blocked.returncode is None

    kill_process_by_name("raylet")
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"

    # Test --block. Killing the command should clean up all child processes.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])
    blocked.poll()
    assert blocked.returncode is None

    wait_for_children_of_pid(blocked.pid, num_children=7, timeout=30)

    blocked.terminate()
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"
示例#5
0
def test_calling_start_ray_head(call_ray_stop_only):

    # Test that we can call ray start with various command line
    # parameters.

    # Test starting Ray with a redis port specified.
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a node IP address specified.
    check_call_ray(
        ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a system config parameter set.
    check_call_ray([
        "start", "--head", "--system-config",
        "{\"metrics_report_interval_ms\":100}", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the object manager and node manager ports
    # specified.
    check_call_ray([
        "start", "--head", "--object-manager-port", "22345",
        "--node-manager-port", "54321", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the worker port range specified.
    check_call_ray([
        "start", "--head", "--min-worker-port", "51000", "--max-worker-port",
        "51050", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with a worker port list.
    check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"])
    check_call_ray(["stop"])

    # Test starting Ray with a non-int in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "10002,a"])
    check_call_ray(["stop"])

    # Test starting Ray with an invalid port in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "100"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of CPUs specified.
    check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of GPUs specified.
    check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with redis shard ports specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port",
        "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with all arguments specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382",
        "--object-manager-port", "22345", "--num-cpus", "2", "--num-gpus", "0",
        "--resources", "{\"Custom\": 1}", "--port", "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with invalid external address.
    # It will fall back to creating a new one.
    check_call_ray(
        ["start", "--head", "--address", "127.0.0.1:6379", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with RAY_REDIS_ADDRESS env.
    os.environ["RAY_REDIS_ADDRESS"] = "127.0.0.1:6379"
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])
    del os.environ["RAY_REDIS_ADDRESS"]

    # Test --block. Killing a child process should cause the command to exit.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])

    blocked.poll()
    assert blocked.returncode is None
    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    kill_process_by_name("raylet", SIGKILL=True)
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"

    # Test --block. Killing the command should clean up all child processes.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])
    blocked.poll()
    assert blocked.returncode is None

    wait_for_children_of_pid(blocked.pid, num_children=7, timeout=30)

    blocked.terminate()
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"