示例#1
0
    def _get_target_nodes(self) -> List[Tuple[str, str]]:
        """Return the list of (id, resource_key) to deploy HTTP servers on."""
        location = self._config.location
        target_nodes = get_all_node_ids()

        if location == DeploymentMode.NoServer:
            return []

        if location == DeploymentMode.HeadOnly:
            head_node_resource_key = get_current_node_resource_key()
            return [(node_id, node_resource)
                    for node_id, node_resource in target_nodes
                    if node_resource == head_node_resource_key][:1]

        if location == DeploymentMode.FixedNumber:
            num_replicas = self._config.fixed_number_replicas
            if num_replicas > len(target_nodes):
                logger.warning(
                    "You specified fixed_number_replicas="
                    f"{num_replicas} but there are only "
                    f"{len(target_nodes)} total nodes. Serve will start one "
                    "HTTP proxy per node.")
                num_replicas = len(target_nodes)

            # Seed the random state so sample is deterministic.
            # i.e. it will always return the same set of nodes.
            random.seed(self._config.fixed_number_selection_seed)
            return random.sample(sorted(target_nodes), k=num_replicas)

        return target_nodes
示例#2
0
 def get_proxy_names():
     proxy_names = []
     for node_id, _ in get_all_node_ids():
         proxy_names.append(
             format_actor_name(SERVE_PROXY_NAME, client._controller_name,
                               node_id))
     return proxy_names
示例#3
0
    def _start_routers_if_needed(self, http_host: str, http_port: str,
                                 http_middlewares: List[Any]) -> None:
        """Start a router on every node if it doesn't already exist."""
        for node_id, node_resource in get_all_node_ids():
            if node_id in self.routers_cache:
                continue

            router_name = format_actor_name(SERVE_PROXY_NAME,
                                            self.controller_name, node_id)
            try:
                router = ray.get_actor(router_name)
            except ValueError:
                logger.info("Starting router with name '{}' on node '{}' "
                            "listening on '{}:{}'".format(
                                router_name, node_id, http_host, http_port))
                router = HTTPProxyActor.options(
                    name=router_name,
                    lifetime="detached" if self.detached else None,
                    max_concurrency=ASYNC_CONCURRENCY,
                    max_restarts=-1,
                    max_task_retries=-1,
                    resources={
                        node_resource: 0.01
                    },
                ).remote(http_host,
                         http_port,
                         controller_name=self.controller_name,
                         http_middlewares=http_middlewares)

            self.routers_cache[node_id] = router
示例#4
0
    def _start_routers_if_needed(self):
        """Start a router on every node if it doesn't already exist."""
        for node_id, node_resource in get_all_node_ids():
            if node_id in self.routers:
                continue

            router_name = format_actor_name(SERVE_PROXY_NAME,
                                            self.instance_name, node_id)
            try:
                router = ray.get_actor(router_name)
            except ValueError:
                logger.info("Starting router with name '{}' on node '{}' "
                            "listening on '{}:{}'".format(
                                router_name, node_id, self.http_host,
                                self.http_port))
                router = HTTPProxyActor.options(
                    name=router_name,
                    max_concurrency=ASYNC_CONCURRENCY,
                    max_restarts=-1,
                    max_task_retries=-1,
                    resources={
                        node_resource: 0.01
                    },
                ).remote(
                    node_id,
                    self.http_host,
                    self.http_port,
                    instance_name=self.instance_name,
                    _http_middlewares=self._http_middlewares)

            self.routers[node_id] = router
示例#5
0
    def _start_proxies_if_needed(self) -> None:
        """Start a proxy on every node if it doesn't already exist."""
        if self._config.host is None:
            return

        for node_id, node_resource in get_all_node_ids():
            if node_id in self._proxy_actors:
                continue

            name = format_actor_name(SERVE_PROXY_NAME, self._controller_name,
                                     node_id)
            try:
                proxy = ray.get_actor(name)
            except ValueError:
                logger.info("Starting HTTP proxy with name '{}' on node '{}' "
                            "listening on '{}:{}'".format(
                                name, node_id, self._config.host,
                                self._config.port))
                proxy = HTTPProxyActor.options(
                    name=name,
                    lifetime="detached" if self._detached else None,
                    max_concurrency=ASYNC_CONCURRENCY,
                    max_restarts=-1,
                    max_task_retries=-1,
                    resources={
                        node_resource: 0.01
                    },
                ).remote(self._config.host,
                         self._config.port,
                         controller_name=self._controller_name,
                         http_middlewares=self._config.middlewares)

            self._proxy_actors[node_id] = proxy
示例#6
0
def test_shutdown(ray_shutdown):
    ray.init(num_cpus=16)
    serve.start(http_options=dict(port=8003))

    @serve.deployment
    def f():
        pass

    f.deploy()

    serve_controller_name = serve.context._global_client._controller_name
    actor_names = [
        serve_controller_name,
        format_actor_name(
            SERVE_PROXY_NAME,
            serve.context._global_client._controller_name,
            get_all_node_ids()[0][0],
        ),
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                if actor_name == serve_controller_name:
                    ray.get_actor(
                        actor_name,
                        namespace=ray.get_runtime_context().namespace)
                else:
                    ray.get_actor(actor_name)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_deployments()

    def check_dead():
        for actor_name in actor_names:
            try:
                if actor_name == serve_controller_name:
                    ray.get_actor(
                        actor_name,
                        namespace=ray.get_runtime_context().namespace)
                else:
                    ray.get_actor(actor_name)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)
示例#7
0
文件: http_state.py 项目: parasj/ray
    def _stop_proxies_if_needed(self) -> bool:
        """Removes proxy actors from any nodes that no longer exist."""
        all_node_ids = {node_id for node_id, _ in get_all_node_ids()}
        to_stop = []
        for node_id in self._proxy_actors:
            if node_id not in all_node_ids:
                logger.info("Removing HTTP proxy on removed node '{}'.".format(node_id))
                to_stop.append(node_id)

        for node_id in to_stop:
            proxy = self._proxy_actors.pop(node_id)
            del self._proxy_actor_names[node_id]
            ray.kill(proxy, no_restart=True)
示例#8
0
    def _get_target_nodes(self) -> List[Tuple[str, str]]:
        """Return the list of (id, resource_key) to deploy HTTP servers on."""
        location = self._config.location
        target_nodes = get_all_node_ids()

        if location == DeploymentMode.NoServer:
            return []

        if location == DeploymentMode.HeadOnly:
            head_node_resource_key = get_current_node_resource_key()
            target_nodes = [(node_id, node_resource)
                            for node_id, node_resource in target_nodes
                            if node_resource == head_node_resource_key][:1]

        return target_nodes
示例#9
0
def test_shutdown(ray_shutdown):
    ray.init(num_cpus=16)
    serve.start(http_port=8003)

    @serve.deployment
    def f():
        pass

    f.deploy()

    actor_names = [
        serve.api._global_client._controller_name,
        format_actor_name(SERVE_PROXY_NAME,
                          serve.api._global_client._controller_name,
                          get_all_node_ids()[0][0])
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_backends()

    def check_dead():
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)
示例#10
0
    def _stop_routers_if_needed(self) -> bool:
        """Removes router actors from any nodes that no longer exist.

        Returns whether or not any actors were removed (a checkpoint should
        be taken).
        """
        actor_stopped = False
        all_node_ids = {node_id for node_id, _ in get_all_node_ids()}
        to_stop = []
        for node_id in self.routers_cache:
            if node_id not in all_node_ids:
                logger.info(
                    "Removing router on removed node '{}'.".format(node_id))
                to_stop.append(node_id)

        for node_id in to_stop:
            router_handle = self.routers_cache.pop(node_id)
            ray.kill(router_handle, no_restart=True)
            actor_stopped = True

        return actor_stopped
示例#11
0
文件: http_state.py 项目: parasj/ray
    def _get_target_nodes(self) -> List[Tuple[str, str]]:
        """Return the list of (node_id, ip_address) to deploy HTTP servers on."""
        location = self._config.location
        target_nodes = get_all_node_ids()

        if location == DeploymentMode.NoServer:
            return []

        if location == DeploymentMode.HeadOnly:
            nodes = [
                (node_id, ip_address)
                for node_id, ip_address in target_nodes
                if node_id == self._head_node_id
            ]
            assert len(nodes) == 1, (
                f"Head node not found! Head node id: {self._head_node_id}, "
                f"all nodes: {target_nodes}."
            )
            return nodes

        if location == DeploymentMode.FixedNumber:
            num_replicas = self._config.fixed_number_replicas
            if num_replicas > len(target_nodes):
                logger.warning(
                    "You specified fixed_number_replicas="
                    f"{num_replicas} but there are only "
                    f"{len(target_nodes)} total nodes. Serve will start one "
                    "HTTP proxy per node."
                )
                num_replicas = len(target_nodes)

            # Seed the random state so sample is deterministic.
            # i.e. it will always return the same set of nodes.
            random.seed(self._config.fixed_number_selection_seed)
            return random.sample(sorted(target_nodes), k=num_replicas)

        return target_nodes