Exemplo n.º 1
0
def ray_start_head_local():
    # Start the Ray processes on this machine.
    run_and_get_output([
        "ray", "start", "--head", "--node-ip-address=localhost",
        "--redis-port=6379"
    ])

    yield None

    # Disconnect from the Ray cluster.
    ray.shutdown()
    # Kill the Ray cluster.
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 2
0
def ray_start_head_local():
    # Start the Ray processes on this machine.
    run_and_get_output([
        "ray", "start", "--head", "--node-ip-address=localhost",
        "--redis-port=6379"
    ])

    yield None

    # Disconnect from the Ray cluster.
    ray.shutdown()
    # Kill the Ray cluster.
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 3
0
 def setUp(self):
     out = run_and_get_output(["ray", "start", "--head"])
     # Get the redis address from the output.
     redis_substring_prefix = "redis_address=\""
     redis_address_location = (
         out.find(redis_substring_prefix) + len(redis_substring_prefix))
     redis_address = out[redis_address_location:]
     self.redis_address = redis_address.split("\"")[0]
Exemplo n.º 4
0
    def testUsingHostnames(self):
        # Start the Ray processes on this machine.
        run_and_get_output([
            "ray", "start", "--head", "--node-ip-address=localhost",
            "--redis-port=6379"
        ])

        ray.init(node_ip_address="localhost", redis_address="localhost:6379")

        @ray.remote
        def f():
            return 1

        self.assertEqual(ray.get(f.remote()), 1)

        # Kill the Ray cluster.
        subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 5
0
 def setUp(self):
     out = run_and_get_output(["ray", "start", "--head"])
     # Get the redis address from the output.
     redis_substring_prefix = "redis_address=\""
     redis_address_location = (out.find(redis_substring_prefix) +
                               len(redis_substring_prefix))
     redis_address = out[redis_address_location:]
     self.redis_address = redis_address.split("\"")[0]
Exemplo n.º 6
0
    def testUsingHostnames(self):
        # Start the Ray processes on this machine.
        run_and_get_output(
            ["ray", "start", "--head",
                             "--node-ip-address=localhost",
                             "--redis-port=6379"])

        ray.init(node_ip_address="localhost", redis_address="localhost:6379")

        @ray.remote
        def f():
            return 1

        self.assertEqual(ray.get(f.remote()), 1)

        # Kill the Ray cluster.
        subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 7
0
def ray_start_head_with_resources():
    out = run_and_get_output(
        ["ray", "start", "--head", "--num-cpus=1", "--num-gpus=1"])
    # Get the redis address from the output.
    redis_substring_prefix = "redis_address=\""
    redis_address_location = (
        out.find(redis_substring_prefix) + len(redis_substring_prefix))
    redis_address = out[redis_address_location:]
    redis_address = redis_address.split("\"")[0]

    yield redis_address

    # Kill the Ray cluster.
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 8
0
def ray_start_head_with_resources():
    out = run_and_get_output(
        ["ray", "start", "--head", "--num-cpus=1", "--num-gpus=1"])
    # Get the redis address from the output.
    redis_substring_prefix = "redis_address=\""
    redis_address_location = (
        out.find(redis_substring_prefix) + len(redis_substring_prefix))
    redis_address = out[redis_address_location:]
    redis_address = redis_address.split("\"")[0]

    yield redis_address

    # Kill the Ray cluster.
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 9
0
def ray_start():
    # Start the Ray processes.
    command = [
        "ray", "start", "--head", "--with-gateway", "--redis-port=21216",
        "--use-raylet"
    ]
    out = run_and_get_output(command)
    print(out)
    time.sleep(2)

    # Initialize Ray
    ray.init(redis_address="127.0.0.1:21216",
             gateway_socat_port=5001,
             gateway_data_port=5002,
             use_raylet=True)
    yield None

    # The code after the yield will run as teardown code.
    ray.shutdown()
    subprocess.Popen(["ray", "stop"])
Exemplo n.º 10
0
    def testCallingStartRayHead(self):
        # Test that we can call start-ray.sh with various command line
        # parameters. TODO(rkn): This test only tests the --head code path. We
        # should also test the non-head node code path.

        # Test starting Ray with no arguments.
        run_and_get_output(["ray", "start", "--head"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a number of workers specified.
        run_and_get_output(["ray", "start", "--head", "--num-workers", "20"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a redis port specified.
        run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with redis shard ports specified.
        run_and_get_output([
            "ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a node IP address specified.
        run_and_get_output(
            ["ray", "start", "--head", "--node-ip-address", "127.0.0.1"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with an object manager port specified.
        run_and_get_output(
            ["ray", "start", "--head", "--object-manager-port", "12345"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the number of CPUs specified.
        run_and_get_output(["ray", "start", "--head", "--num-cpus", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the number of GPUs specified.
        run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the max redis clients specified.
        run_and_get_output(
            ["ray", "start", "--head", "--redis-max-clients", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with all arguments specified.
        run_and_get_output([
            "ray", "start", "--head", "--num-workers", "20", "--redis-port",
            "6379", "--redis-shard-ports", "6380,6381,6382",
            "--object-manager-port", "12345", "--num-cpus", "100",
            "--num-gpus", "0", "--redis-max-clients", "100", "--resources",
            "{\"Custom\": 1}"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with invalid arguments.
        with self.assertRaises(Exception):
            run_and_get_output([
                "ray", "start", "--head", "--redis-address", "127.0.0.1:6379"
            ])
        subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 11
0
    def _testCleanupOnDriverExit(self, num_redis_shards):
        stdout = run_and_get_output([
            "ray",
            "start",
            "--head",
            "--num-redis-shards",
            str(num_redis_shards),
        ])
        lines = [m.strip() for m in stdout.split("\n")]
        init_cmd = [m for m in lines if m.startswith("ray.init")]
        self.assertEqual(1, len(init_cmd))
        redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2]

        def StateSummary():
            obj_tbl_len = len(ray.global_state.object_table())
            task_tbl_len = len(ray.global_state.task_table())
            func_tbl_len = len(ray.global_state.function_table())
            return obj_tbl_len, task_tbl_len, func_tbl_len

        def Driver(success):
            success.value = True
            # Start driver.
            ray.init(redis_address=redis_address)
            summary_start = StateSummary()
            if (0, 1) != summary_start[:2]:
                success.value = False

            # Two new objects.
            ray.get(ray.put(1111))
            ray.get(ray.put(1111))
            if (2, 1, summary_start[2]) != StateSummary():
                success.value = False

            @ray.remote
            def f():
                ray.put(1111)  # Yet another object.
                return 1111  # A returned object as well.

            # 1 new function.
            if (2, 1, summary_start[2] + 1) != StateSummary():
                success.value = False

            ray.get(f.remote())
            if (4, 2, summary_start[2] + 1) != StateSummary():
                success.value = False

            ray.shutdown()

        success = multiprocessing.Value('b', False)
        driver = multiprocessing.Process(target=Driver, args=(success, ))
        driver.start()
        # Wait for client to exit.
        driver.join()
        time.sleep(5)

        # Just make sure Driver() is run and succeeded. Note(rkn), if the below
        # assertion starts failing, then the issue may be that the summary
        # values computed in the Driver function are being updated slowly and
        # so the call to StateSummary() is getting outdated values. This could
        # be fixed by looping until StateSummary() returns the desired values.
        self.assertTrue(success.value)
        # Check that objects, tasks, and functions are cleaned up.
        ray.init(redis_address=redis_address)
        # The assertion below can fail if the monitor is too slow to clean up
        # the global state.
        self.assertEqual((0, 1), StateSummary()[:2])

        ray.shutdown()
        subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 12
0
def test_calling_start_ray_head():
    # Test that we can call start-ray.sh with various command line
    # parameters. TODO(rkn): This test only tests the --head code path. We
    # should also test the non-head node code path.

    # Test starting Ray with no arguments.
    run_and_get_output(["ray", "start", "--head"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with a redis port specified.
    run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with a node IP address specified.
    run_and_get_output(
        ["ray", "start", "--head", "--node-ip-address", "127.0.0.1"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the object manager and node manager ports
    # specified.
    run_and_get_output([
        "ray", "start", "--head", "--object-manager-port", "12345",
        "--node-manager-port", "54321"
    ])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the number of CPUs specified.
    run_and_get_output(["ray", "start", "--head", "--num-cpus", "2"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the number of GPUs specified.
    run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the max redis clients specified.
    run_and_get_output(
        ["ray", "start", "--head", "--redis-max-clients", "100"])
    subprocess.Popen(["ray", "stop"]).wait()

    if "RAY_USE_NEW_GCS" not in os.environ:
        # Test starting Ray with redis shard ports specified.
        run_and_get_output([
            "ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with all arguments specified.
        run_and_get_output([
            "ray", "start", "--head", "--redis-port", "6379",
            "--redis-shard-ports", "6380,6381,6382", "--object-manager-port",
            "12345", "--num-cpus", "2", "--num-gpus", "0",
            "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with invalid arguments.
    with pytest.raises(Exception):
        run_and_get_output(
            ["ray", "start", "--head", "--redis-address", "127.0.0.1:6379"])
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 13
0
def test_calling_start_ray_head():
    # Test that we can call start-ray.sh with various command line
    # parameters. TODO(rkn): This test only tests the --head code path. We
    # should also test the non-head node code path.

    # Test starting Ray with no arguments.
    run_and_get_output(["ray", "start", "--head"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with a redis port specified.
    run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with a node IP address specified.
    run_and_get_output(
        ["ray", "start", "--head", "--node-ip-address", "127.0.0.1"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the object manager and node manager ports
    # specified.
    run_and_get_output([
        "ray", "start", "--head", "--object-manager-port", "12345",
        "--node-manager-port", "54321"
    ])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the number of CPUs specified.
    run_and_get_output(["ray", "start", "--head", "--num-cpus", "2"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the number of GPUs specified.
    run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"])
    subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with the max redis clients specified.
    run_and_get_output(
        ["ray", "start", "--head", "--redis-max-clients", "100"])
    subprocess.Popen(["ray", "stop"]).wait()

    if "RAY_USE_NEW_GCS" not in os.environ:
        # Test starting Ray with redis shard ports specified.
        run_and_get_output([
            "ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with all arguments specified.
        run_and_get_output([
            "ray", "start", "--head", "--redis-port", "6379",
            "--redis-shard-ports", "6380,6381,6382", "--object-manager-port",
            "12345", "--num-cpus", "2", "--num-gpus", "0",
            "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}"
        ])
        subprocess.Popen(["ray", "stop"]).wait()

    # Test starting Ray with invalid arguments.
    with pytest.raises(Exception):
        run_and_get_output(
            ["ray", "start", "--head", "--redis-address", "127.0.0.1:6379"])
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 14
0
def _test_cleanup_on_driver_exit(num_redis_shards):
    stdout = run_and_get_output([
        "ray",
        "start",
        "--head",
        "--num-redis-shards",
        str(num_redis_shards),
    ])
    lines = [m.strip() for m in stdout.split("\n")]
    init_cmd = [m for m in lines if m.startswith("ray.init")]
    assert 1 == len(init_cmd)
    redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2]
    max_attempts_before_failing = 100
    # Wait for monitor.py to start working.
    time.sleep(2)

    def StateSummary():
        obj_tbl_len = len(ray.global_state.object_table())
        task_tbl_len = len(ray.global_state.task_table())
        func_tbl_len = len(ray.global_state.function_table())
        return obj_tbl_len, task_tbl_len, func_tbl_len

    def Driver(success):
        success.value = True
        # Start driver.
        ray.init(redis_address=redis_address)
        summary_start = StateSummary()
        if (0, 1) != summary_start[:2]:
            success.value = False

        # Two new objects.
        ray.get(ray.put(1111))
        ray.get(ray.put(1111))
        attempts = 0
        while (2, 1, summary_start[2]) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        @ray.remote
        def f():
            ray.put(1111)  # Yet another object.
            return 1111  # A returned object as well.

        # 1 new function.
        attempts = 0
        while (2, 1, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.get(f.remote())
        attempts = 0
        while (4, 2, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.shutdown()

    success = multiprocessing.Value('b', False)
    driver = multiprocessing.Process(target=Driver, args=(success, ))
    driver.start()
    # Wait for client to exit.
    driver.join()

    # Just make sure Driver() is run and succeeded.
    assert success.value
    # Check that objects, tasks, and functions are cleaned up.
    ray.init(redis_address=redis_address)
    attempts = 0
    while (0, 1) != StateSummary()[:2]:
        time.sleep(0.1)
        attempts += 1
        if attempts == max_attempts_before_failing:
            break
    assert (0, 1) == StateSummary()[:2]

    ray.shutdown()
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 15
0
def _test_cleanup_on_driver_exit(num_redis_shards):
    stdout = run_and_get_output([
        "ray",
        "start",
        "--head",
        "--num-redis-shards",
        str(num_redis_shards),
    ])
    lines = [m.strip() for m in stdout.split("\n")]
    init_cmd = [m for m in lines if m.startswith("ray.init")]
    assert 1 == len(init_cmd)
    redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2]

    def StateSummary():
        obj_tbl_len = len(ray.global_state.object_table())
        task_tbl_len = len(ray.global_state.task_table())
        func_tbl_len = len(ray.global_state.function_table())
        return obj_tbl_len, task_tbl_len, func_tbl_len

    def Driver(success):
        success.value = True
        # Start driver.
        ray.init(redis_address=redis_address)
        summary_start = StateSummary()
        if (0, 1) != summary_start[:2]:
            success.value = False

        max_attempts_before_failing = 100

        # Two new objects.
        ray.get(ray.put(1111))
        ray.get(ray.put(1111))
        attempts = 0
        while (2, 1, summary_start[2]) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        @ray.remote
        def f():
            ray.put(1111)  # Yet another object.
            return 1111  # A returned object as well.

        # 1 new function.
        attempts = 0
        while (2, 1, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.get(f.remote())
        attempts = 0
        while (4, 2, summary_start[2] + 1) != StateSummary():
            time.sleep(0.1)
            attempts += 1
            if attempts == max_attempts_before_failing:
                success.value = False
                break

        ray.shutdown()

    success = multiprocessing.Value('b', False)
    driver = multiprocessing.Process(target=Driver, args=(success, ))
    driver.start()
    # Wait for client to exit.
    driver.join()
    time.sleep(3)

    # Just make sure Driver() is run and succeeded. Note(rkn), if the below
    # assertion starts failing, then the issue may be that the summary
    # values computed in the Driver function are being updated slowly and
    # so the call to StateSummary() is getting outdated values. This could
    # be fixed by looping until StateSummary() returns the desired values.
    assert success.value
    # Check that objects, tasks, and functions are cleaned up.
    ray.init(redis_address=redis_address)
    # The assertion below can fail if the monitor is too slow to clean up
    # the global state.
    assert (0, 1) == StateSummary()[:2]

    ray.shutdown()
    subprocess.Popen(["ray", "stop"]).wait()
Exemplo n.º 16
0
    def testCallingStartRayHead(self):
        # Test that we can call start-ray.sh with various command line
        # parameters. TODO(rkn): This test only tests the --head code path. We
        # should also test the non-head node code path.

        # Test starting Ray with no arguments.
        run_and_get_output(["ray", "start", "--head"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a number of workers specified.
        run_and_get_output(["ray", "start", "--head", "--num-workers",
                            "20"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a redis port specified.
        run_and_get_output(["ray", "start", "--head",
                            "--redis-port", "6379"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with redis shard ports specified.
        run_and_get_output(["ray", "start", "--head",
                            "--redis-shard-ports", "6380,6381,6382"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with a node IP address specified.
        run_and_get_output(["ray", "start", "--head",
                            "--node-ip-address", "127.0.0.1"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with an object manager port specified.
        run_and_get_output(["ray", "start", "--head",
                            "--object-manager-port", "12345"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the number of CPUs specified.
        run_and_get_output(["ray", "start", "--head",
                            "--num-cpus", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the number of GPUs specified.
        run_and_get_output(["ray", "start", "--head",
                            "--num-gpus", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with the max redis clients specified.
        run_and_get_output(["ray", "start", "--head",
                            "--redis-max-clients", "100"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with all arguments specified.
        run_and_get_output(["ray", "start", "--head",
                            "--num-workers", "20",
                            "--redis-port", "6379",
                            "--redis-shard-ports", "6380,6381,6382",
                            "--object-manager-port", "12345",
                            "--num-cpus", "100",
                            "--num-gpus", "0",
                            "--redis-max-clients", "100",
                            "--resources", "{\"Custom\": 1}"])
        subprocess.Popen(["ray", "stop"]).wait()

        # Test starting Ray with invalid arguments.
        with self.assertRaises(Exception):
            run_and_get_output(["ray", "start", "--head",
                                "--redis-address", "127.0.0.1:6379"])
        subprocess.Popen(["ray", "stop"]).wait()