Пример #1
0
    def __init__(self, provider_config, cluster_name):
        NodeProvider.__init__(self, provider_config, cluster_name)

        if cluster_name:
            lock_path = get_lock_path(cluster_name)
            state_path = get_state_path(cluster_name)
            self.state = ClusterState(
                lock_path,
                state_path,
                provider_config,
            )
            self.use_coordinator = False
        else:
            # LocalNodeProvider with a coordinator server.
            self.state = OnPremCoordinatorState(
                "/tmp/coordinator.lock", "/tmp/coordinator.state",
                provider_config["list_of_node_ips"])
            self.use_coordinator = True
Пример #2
0
    def testClusterStateInit(self):
        """Check ClusterState __init__ func generates correct state file.

        Test the general use case and if num_workers increase/decrease.
        """
        # Use a random head_ip so that the state file is regenerated each time
        # this test is run. (Otherwise the test will fail spuriously when run a
        # second time.)
        self._monkeypatch.setenv("RAY_TMPDIR", self._tmpdir)
        # ensure that a new cluster can start up if RAY_TMPDIR doesn't exist yet
        assert not os.path.exists(get_ray_temp_dir())
        head_ip = ".".join(str(random.randint(0, 255)) for _ in range(4))
        cluster_config = {
            "cluster_name": "random_name",
            "min_workers": 0,
            "max_workers": 0,
            "provider": {
                "type": "local",
                "head_ip": head_ip,
                "worker_ips": ["0.0.0.0:1"],
                "external_head_ip": "0.0.0.0.3",
            },
        }
        provider_config = cluster_config["provider"]
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False
        )
        assert os.path.exists(get_ray_temp_dir())
        assert node_provider.external_ip(head_ip) == "0.0.0.0.3"
        assert isinstance(node_provider, LocalNodeProvider)
        expected_workers = {}
        expected_workers[provider_config["head_ip"]] = {
            "tags": {TAG_RAY_NODE_KIND: NODE_KIND_HEAD},
            "state": "terminated",
            "external_ip": "0.0.0.0.3",
        }
        expected_workers[provider_config["worker_ips"][0]] = {
            "tags": {TAG_RAY_NODE_KIND: NODE_KIND_WORKER},
            "state": "terminated",
        }

        state_save_path = local_config.get_state_path(cluster_config["cluster_name"])

        assert os.path.exists(state_save_path)
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers

        # Test removing workers updates the cluster state.
        del expected_workers[provider_config["worker_ips"][0]]
        removed_ip = provider_config["worker_ips"].pop()
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False
        )
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers

        # Test adding back workers updates the cluster state.
        expected_workers[removed_ip] = {
            "tags": {TAG_RAY_NODE_KIND: NODE_KIND_WORKER},
            "state": "terminated",
        }
        provider_config["worker_ips"].append(removed_ip)
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False
        )
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers

        # Test record_local_head_state_if_needed
        head_ip = cluster_config["provider"]["head_ip"]
        cluster_name = cluster_config["cluster_name"]
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False
        )
        assert head_ip not in node_provider.non_terminated_nodes({})
        record_local_head_state_if_needed(node_provider)
        assert head_ip in node_provider.non_terminated_nodes({})
        expected_head_tags = {
            TAG_RAY_NODE_KIND: NODE_KIND_HEAD,
            TAG_RAY_USER_NODE_TYPE: local_config.LOCAL_CLUSTER_NODE_TYPE,
            TAG_RAY_NODE_NAME: "ray-{}-head".format(cluster_name),
            TAG_RAY_NODE_STATUS: STATUS_UP_TO_DATE,
        }
        assert node_provider.node_tags(head_ip) == expected_head_tags
        # Repeat and verify nothing has changed.
        record_local_head_state_if_needed(node_provider)
        assert head_ip in node_provider.non_terminated_nodes({})
        assert node_provider.node_tags(head_ip) == expected_head_tags
Пример #3
0
    def testClusterStateInit(self):
        """Check ClusterState __init__ func generates correct state file.

        Test the general use case and if num_workers increase/decrease.
        """

        cluster_config = {
            "cluster_name": "random_name",
            "min_workers": 0,
            "max_workers": 0,
            "provider": {
                "type": "local",
                "head_ip": "0.0.0.0:2",
                "worker_ips": ["0.0.0.0:1"],
                "external_head_ip": "0.0.0.0.3"
            },
        }
        provider_config = cluster_config["provider"]
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False)
        assert node_provider.external_ip("0.0.0.0:2") == "0.0.0.0.3"
        assert isinstance(node_provider, LocalNodeProvider)
        expected_workers = {}
        expected_workers[provider_config["head_ip"]] = {
            "tags": {
                TAG_RAY_NODE_KIND: NODE_KIND_HEAD
            },
            "state": "terminated",
            "external_ip": "0.0.0.0.3"
        }
        expected_workers[provider_config["worker_ips"][0]] = {
            "tags": {
                TAG_RAY_NODE_KIND: NODE_KIND_WORKER
            },
            "state": "terminated",
        }

        state_save_path = local_config.get_state_path(
            cluster_config["cluster_name"])

        assert os.path.exists(state_save_path)
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers

        # Test removing workers updates the cluster state.
        del expected_workers[provider_config["worker_ips"][0]]
        removed_ip = provider_config["worker_ips"].pop()
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False)
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers

        # Test adding back workers updates the cluster state.
        expected_workers[removed_ip] = {
            "tags": {
                TAG_RAY_NODE_KIND: NODE_KIND_WORKER
            },
            "state": "terminated",
        }
        provider_config["worker_ips"].append(removed_ip)
        node_provider = _get_node_provider(
            provider_config, cluster_config["cluster_name"], use_cache=False)
        workers = json.loads(open(state_save_path).read())
        assert workers == expected_workers