示例#1
0
def test_docker_shm_override(run_option_type):
    process_runner = MockProcessRunner()
    provider = MockProvider()
    provider.create_node({}, {}, 1)
    cluster_name = "cluster"

    docker_config = {
        "container_name": "container",
        "image": "rayproject/ray:latest",
        run_option_type: ["--shm-size=80g"]
    }
    args = {
        "log_prefix": "prefix",
        "node_id": 0,
        "provider": provider,
        "auth_config": auth_config,
        "cluster_name": cluster_name,
        "process_runner": process_runner,
        "use_internal_ip": False,
        "docker_config": docker_config,
    }
    cmd_runner = DockerCommandRunner(**args)

    process_runner.respond_to_call("json .Config.Env", 2 * ["[]"])
    cmd_runner.run_init(as_head=True, file_mounts={}, sync_run_yet=True)

    # Ensure the user-provided SHM size is used.
    process_runner.assert_has_call("1.2.3.4", pattern="--shm-size=80g")

    # Ensure that SHM auto detection is bypassed
    process_runner.assert_not_has_call("1.2.3.4", pattern="/proc/meminfo")
示例#2
0
def test_rsync_without_exclude_and_filter():
    process_runner = MockProcessRunner()
    provider = MockProvider()
    provider.create_node({}, {}, 1)
    cluster_name = "cluster"
    args = {
        "log_prefix": "prefix",
        "node_id": 0,
        "provider": provider,
        "auth_config": auth_config,
        "cluster_name": cluster_name,
        "process_runner": process_runner,
        "use_internal_ip": False,
    }
    cmd_runner = SSHCommandRunner(**args)

    local_mount = "/home/ubuntu/base/mount/"
    remote_mount = "/root/protected_mount/"

    process_runner.respond_to_call("docker inspect -f", ["true"])
    cmd_runner.run_rsync_up(local_mount,
                            remote_mount,
                            options={
                                "docker_mount_if_possible": True,
                            })

    process_runner.assert_not_has_call("1.2.3.4", pattern="--exclude test")
    process_runner.assert_not_has_call("1.2.3.4",
                                       pattern="--filter dir-merge,- .ignore")
 def testCommandPassing(self):
     t = "custom"
     config = MULTI_WORKER_CLUSTER.copy()
     config["available_node_types"]["p2.8xlarge"][
         "worker_setup_commands"] = ["new_worker_setup_command"]
     config["available_node_types"]["p2.xlarge"][
         "initialization_commands"] = ["new_worker_initialization_cmd"]
     config["available_node_types"]["p2.xlarge"]["resources"][t] = 1
     # Commenting out this line causes the test case to fail?!?!
     config["min_workers"] = 0
     config["max_workers"] = 10
     config_path = self.write_config(config)
     self.provider = MockProvider()
     runner = MockProcessRunner()
     autoscaler = StandardAutoscaler(
         config_path,
         LoadMetrics(),
         max_failures=0,
         process_runner=runner,
         update_interval_s=0)
     assert len(self.provider.non_terminated_nodes({})) == 0
     autoscaler.update()
     self.waitForNodes(0)
     autoscaler.request_resources([{"CPU": 1}])
     autoscaler.update()
     self.waitForNodes(1)
     assert self.provider.mock_nodes[0].node_type == "m4.large"
     autoscaler.request_resources([{"GPU": 8}])
     autoscaler.update()
     self.waitForNodes(2)
     assert self.provider.mock_nodes[1].node_type == "p2.8xlarge"
     autoscaler.request_resources([{"GPU": 1}] * 9)
     autoscaler.update()
     self.waitForNodes(3)
     assert self.provider.mock_nodes[2].node_type == "p2.xlarge"
     autoscaler.update()
     sleep(0.1)
     runner.assert_has_call(self.provider.mock_nodes[1].internal_ip,
                            "new_worker_setup_command")
     runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip,
                                "setup_cmd")
     runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip,
                                "worker_setup_cmd")
     runner.assert_has_call(self.provider.mock_nodes[2].internal_ip,
                            "new_worker_initialization_cmd")
     runner.assert_not_has_call(self.provider.mock_nodes[2].internal_ip,
                                "init_cmd")
示例#4
0
def test_docker_rsync():
    process_runner = MockProcessRunner()
    provider = MockProvider()
    provider.create_node({}, {}, 1)
    cluster_name = "cluster"
    docker_config = {"container_name": "container"}
    args = {
        "log_prefix": "prefix",
        "node_id": 0,
        "provider": provider,
        "auth_config": auth_config,
        "cluster_name": cluster_name,
        "process_runner": process_runner,
        "use_internal_ip": False,
        "docker_config": docker_config,
    }
    cmd_runner = DockerCommandRunner(**args)

    local_mount = "/home/ubuntu/base/mount/"
    remote_mount = "/root/protected_mount/"
    docker_mount_prefix = get_docker_host_mount_location(cluster_name)
    remote_host_mount = f"{docker_mount_prefix}{remote_mount}"

    local_file = "/home/ubuntu/base-file"
    remote_file = "/root/protected-file"
    remote_host_file = f"{docker_mount_prefix}{remote_file}"

    process_runner.respond_to_call("docker inspect -f", ["true"])
    cmd_runner.run_rsync_up(local_mount,
                            remote_mount,
                            options={"docker_mount_if_possible": True})

    # Make sure we do not copy directly to raw destination
    process_runner.assert_not_has_call(
        "1.2.3.4", pattern=f"-avz {local_mount} [email protected]:{remote_mount}")
    process_runner.assert_not_has_call("1.2.3.4",
                                       pattern=f"mkdir -p {remote_mount}")
    # No docker cp for file_mounts
    process_runner.assert_not_has_call("1.2.3.4", pattern="docker cp")
    process_runner.assert_has_call(
        "1.2.3.4",
        pattern=f"-avz {local_mount} [email protected]:{remote_host_mount}")
    process_runner.clear_history()
    ##############################

    process_runner.respond_to_call("docker inspect -f", ["true"])
    cmd_runner.run_rsync_up(local_file,
                            remote_file,
                            options={"docker_mount_if_possible": False})

    # Make sure we do not copy directly to raw destination
    process_runner.assert_not_has_call(
        "1.2.3.4", pattern=f"-avz {local_file} [email protected]:{remote_file}")
    process_runner.assert_not_has_call("1.2.3.4",
                                       pattern=f"mkdir -p {remote_file}")

    process_runner.assert_has_call("1.2.3.4", pattern="docker cp")
    process_runner.assert_has_call(
        "1.2.3.4", pattern=f"-avz {local_file} [email protected]:{remote_host_file}")
    process_runner.clear_history()
    ##############################

    cmd_runner.run_rsync_down(remote_mount,
                              local_mount,
                              options={"docker_mount_if_possible": True})

    process_runner.assert_not_has_call("1.2.3.4", pattern="docker cp")
    process_runner.assert_not_has_call(
        "1.2.3.4", pattern=f"-avz [email protected]:{remote_mount} {local_mount}")
    process_runner.assert_has_call(
        "1.2.3.4",
        pattern=f"-avz [email protected]:{remote_host_mount} {local_mount}")

    process_runner.clear_history()
    ##############################

    cmd_runner.run_rsync_down(remote_file,
                              local_file,
                              options={"docker_mount_if_possible": False})

    process_runner.assert_has_call("1.2.3.4", pattern="docker cp")
    process_runner.assert_not_has_call(
        "1.2.3.4", pattern=f"-avz [email protected]:{remote_file} {local_file}")
    process_runner.assert_has_call(
        "1.2.3.4", pattern=f"-avz [email protected]:{remote_host_file} {local_file}")
    def testDockerWorkers(self):
        config = MULTI_WORKER_CLUSTER.copy()
        config["available_node_types"]["p2.8xlarge"]["docker"] = {
            "worker_image": "p2.8x_image:latest",
            "worker_run_options": ["p2.8x-run-options"]
        }
        config["available_node_types"]["p2.xlarge"]["docker"] = {
            "worker_image": "p2x_image:nightly"
        }
        config["docker"]["worker_run_options"] = ["standard-run-options"]
        config["docker"]["image"] = "default-image:nightly"
        config["docker"]["worker_image"] = "default-image:nightly"
        # Commenting out this line causes the test case to fail?!?!
        config["min_workers"] = 0
        config["max_workers"] = 10
        config_path = self.write_config(config)
        self.provider = MockProvider()
        runner = MockProcessRunner()
        autoscaler = StandardAutoscaler(config_path,
                                        LoadMetrics(),
                                        max_failures=0,
                                        process_runner=runner,
                                        update_interval_s=0)
        assert len(self.provider.non_terminated_nodes({})) == 0
        autoscaler.update()
        self.waitForNodes(0)
        autoscaler.request_resources([{"CPU": 1}])
        autoscaler.update()
        self.waitForNodes(1)
        assert self.provider.mock_nodes[0].node_type == "m4.large"
        autoscaler.request_resources([{"GPU": 8}])
        autoscaler.update()
        self.waitForNodes(2)
        assert self.provider.mock_nodes[1].node_type == "p2.8xlarge"
        autoscaler.request_resources([{"GPU": 1}] * 9)
        autoscaler.update()
        self.waitForNodes(3)
        assert self.provider.mock_nodes[2].node_type == "p2.xlarge"
        autoscaler.update()
        # Fill up m4, p2.8, p2 and request 2 more CPUs
        autoscaler.request_resources([{
            "CPU": 2
        }, {
            "CPU": 16
        }, {
            "CPU": 32
        }, {
            "CPU": 2
        }])
        autoscaler.update()
        self.waitForNodes(4)
        assert self.provider.mock_nodes[3].node_type == "m4.16xlarge"
        autoscaler.update()
        sleep(0.1)
        runner.assert_has_call(self.provider.mock_nodes[1].internal_ip,
                               "p2.8x-run-options")
        runner.assert_has_call(self.provider.mock_nodes[1].internal_ip,
                               "p2.8x_image:latest")
        runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip,
                                   "default-image:nightly")
        runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip,
                                   "standard-run-options")

        runner.assert_has_call(self.provider.mock_nodes[2].internal_ip,
                               "p2x_image:nightly")
        runner.assert_has_call(self.provider.mock_nodes[2].internal_ip,
                               "standard-run-options")
        runner.assert_not_has_call(self.provider.mock_nodes[2].internal_ip,
                                   "p2.8x-run-options")

        runner.assert_has_call(self.provider.mock_nodes[3].internal_ip,
                               "default-image:nightly")
        runner.assert_has_call(self.provider.mock_nodes[3].internal_ip,
                               "standard-run-options")
        runner.assert_not_has_call(self.provider.mock_nodes[3].internal_ip,
                                   "p2.8x-run-options")
        runner.assert_not_has_call(self.provider.mock_nodes[3].internal_ip,
                                   "p2x_image:nightly")