示例#1
0
def test_refresh_controller_after_death(shutdown_ray, detached):
    """Check if serve.start() refreshes the controller handle if it's dead."""

    ray.init(namespace="ray_namespace")
    serve.shutdown()  # Ensure serve isn't running before beginning the test
    serve.start(detached=detached)

    old_handle = get_global_client()._controller
    ray.kill(old_handle, no_restart=True)

    def controller_died(handle):
        try:
            ray.get(handle.check_alive.remote())
            return False
        except RayActorError:
            return True

    wait_for_condition(controller_died, handle=old_handle, timeout=15)

    # Call start again to refresh handle
    serve.start(detached=detached)

    new_handle = get_global_client()._controller
    assert new_handle is not old_handle

    # Health check should not error
    ray.get(new_handle.check_alive.remote())

    serve.shutdown()
    ray.shutdown()
示例#2
0
    async def put_all_deployments(self, req: Request) -> Response:
        from ray.serve.context import get_global_client
        from ray.serve.schema import ServeApplicationSchema

        config = ServeApplicationSchema.parse_obj(await req.json())
        get_global_client().deploy_app(config)

        return Response()
def construct_wide_fanout_graph_with_pure_handle(
        fanout_degree,
        sync_handle: bool,
        init_delay_secs=0,
        compute_delay_secs=0) -> RayServeSyncHandle:
    nodes = []
    for id in range(fanout_degree):
        Node.options(name=str(id)).deploy(id, init_delay_secs=init_delay_secs)
        nodes.append(get_global_client().get_handle(str(id), sync=sync_handle))
    CombineNode.options(name="combine").deploy(nodes,
                                               compute_delay_secs,
                                               sync_handle=sync_handle)
    return get_global_client().get_handle("combine", sync=sync_handle)
示例#4
0
    def deploy(self, *init_args, _blocking=True, **init_kwargs):
        """Deploy or update this deployment.

        Args:
            init_args: args to pass to the class __init__
                method. Not valid if this deployment wraps a function.
            init_kwargs: kwargs to pass to the class __init__
                method. Not valid if this deployment wraps a function.
        """
        if len(init_args) == 0 and self._init_args is not None:
            init_args = self._init_args
        if len(init_kwargs) == 0 and self._init_kwargs is not None:
            init_kwargs = self._init_kwargs

        return get_global_client().deploy(
            self._name,
            self._func_or_class,
            init_args,
            init_kwargs,
            ray_actor_options=self._ray_actor_options,
            config=self._config,
            version=self._version,
            prev_version=self._prev_version,
            route_prefix=self.route_prefix,
            url=self.url,
            _blocking=_blocking,
        )
示例#5
0
    def url(self) -> Optional[str]:
        """Full HTTP url for this deployment."""
        if self._route_prefix is None:
            # this deployment is not exposed over HTTP
            return None

        return get_global_client().root_url + self.route_prefix
示例#6
0
    async def get_all_deployments(self, req: Request) -> Response:
        from ray.serve.context import get_global_client

        client = get_global_client()

        return Response(
            text=json.dumps(client.get_app_config()),
            content_type="application/json",
        )
示例#7
0
    async def put_all_deployments(self, req: Request) -> Response:
        from ray import serve
        from ray.serve.context import get_global_client
        from ray.serve.application import Application

        app = Application.from_dict(await req.json())
        serve.run(app, _blocking=False)

        new_names = set()
        for deployment in app.deployments.values():
            new_names.add(deployment.name)

        all_deployments = serve.list_deployments()
        all_names = set(all_deployments.keys())
        names_to_delete = all_names.difference(new_names)
        get_global_client().delete_deployments(names_to_delete)

        return Response()
示例#8
0
def construct_long_chain_graph_with_pure_handle(
    chain_length, sync_handle: bool, init_delay_secs=0, compute_delay_secs=0
):
    prev_handle = None
    for id in range(chain_length):
        Node.options(name=str(id)).deploy(
            id, prev_handle, init_delay_secs, compute_delay_secs, sync_handle
        )
        prev_handle = get_global_client().get_handle(str(id), sync=sync_handle)
    return prev_handle
示例#9
0
    async def get_all_deployment_statuses(self, req: Request) -> Response:
        from ray.serve.context import get_global_client
        from ray.serve.schema import serve_status_to_schema

        client = get_global_client()

        serve_status_schema = serve_status_to_schema(client.get_serve_status())
        return Response(
            text=serve_status_schema.json(),
            content_type="application/json",
        )
示例#10
0
def test_handle_cache_out_of_scope(serve_instance):
    # https://github.com/ray-project/ray/issues/18980
    initial_num_cached = len(get_global_client().handle_cache)

    @serve.deployment(name="f")
    def f():
        return "hi"

    handle = serve.run(f.bind())

    handle_cache = get_global_client().handle_cache
    assert len(handle_cache) == initial_num_cached + 1

    def sender_where_handle_goes_out_of_scope():
        f = serve.get_deployment("f").get_handle()
        assert f is handle
        assert ray.get(f.remote()) == "hi"

    [sender_where_handle_goes_out_of_scope() for _ in range(30)]
    assert len(handle_cache) == initial_num_cached + 1
示例#11
0
def test_override_namespace(shutdown_ray, detached):
    """Test the _override_controller_namespace flag in serve.start()."""

    ray_namespace = "ray_namespace"
    controller_namespace = "controller_namespace"

    ray.init(namespace=ray_namespace)
    serve.start(detached=detached, _override_controller_namespace=controller_namespace)

    controller_name = get_global_client()._controller_name
    ray.get_actor(controller_name, namespace=controller_namespace)

    serve.shutdown()
示例#12
0
    def get_handle(
        self, sync: Optional[bool] = True
    ) -> Union[RayServeHandle, RayServeSyncHandle]:
        """Get a ServeHandle to this deployment to invoke it from Python.

        Args:
            sync: If true, then Serve will return a ServeHandle that
                works everywhere. Otherwise, Serve will return an
                asyncio-optimized ServeHandle that's only usable in an asyncio
                loop.

        Returns:
            ServeHandle
        """

        return get_global_client().get_handle(self._name, missing_ok=True, sync=sync)
示例#13
0
文件: api.py 项目: ray-project/ray
def shutdown() -> None:
    """Completely shut down the connected Serve instance.

    Shuts down all processes and deletes all state associated with the
    instance.
    """

    try:
        client = get_global_client()
    except RayServeException:
        logger.info("Nothing to shut down. There's no Serve application "
                    "running on this Ray cluster.")
        return

    client.shutdown()
    set_global_client(None)
示例#14
0
    async def put_all_deployments(self, req: Request) -> Response:
        from ray import serve
        from ray.serve.context import get_global_client
        from ray.serve.schema import ServeApplicationSchema
        from ray.serve.application import Application

        config = ServeApplicationSchema.parse_obj(await req.json())

        if config.import_path is not None:
            client = get_global_client(_override_controller_namespace="serve")
            client.deploy_app(config)
        else:
            # TODO (shrekris-anyscale): Remove this conditional path
            app = Application.from_dict(await req.json())
            serve.run(app, _blocking=False)

        return Response()
示例#15
0
def get_deployment_statuses() -> Dict[str, DeploymentStatusInfo]:
    """Returns a dictionary of deployment statuses.

    A deployment's status is one of {UPDATING, UNHEALTHY, and HEALTHY}.

    Example:
    >>> from ray.serve.api import get_deployment_statuses
    >>> statuses = get_deployment_statuses() # doctest: +SKIP
    >>> status_info = statuses["deployment_name"] # doctest: +SKIP
    >>> status = status_info.status # doctest: +SKIP
    >>> message = status_info.message # doctest: +SKIP

    Returns:
            Dict[str, DeploymentStatus]: This dictionary maps the running
                deployment's name to a DeploymentStatus object containing its
                status and a message explaining the status.
    """

    return get_global_client().get_deployment_statuses()
示例#16
0
def test_get_serve_status(shutdown_ray):

    ray.init()

    @serve.deployment
    def f(*args):
        return "Hello world"

    serve.run(f.bind())

    client = get_global_client()
    status_info_1 = client.get_serve_status()
    assert status_info_1.app_status.status == "RUNNING"
    assert status_info_1.deployment_statuses[0].name == "f"
    assert status_info_1.deployment_statuses[0].status in {
        "UPDATING", "HEALTHY"
    }

    serve.shutdown()
    ray.shutdown()
示例#17
0
def test_fixed_number_proxies(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)

    ray.init(head_node.address)
    node_ids = ray._private.state.node_ids()
    assert len(node_ids) == 3

    with pytest.raises(
        pydantic.ValidationError,
        match="you must specify the `fixed_number_replicas` parameter.",
    ):
        serve.start(
            http_options={
                "location": "FixedNumber",
            }
        )

    serve.start(
        http_options={
            "port": new_port(),
            "location": "FixedNumber",
            "fixed_number_replicas": 2,
        }
    )

    # Only the controller and two http proxy should be started.
    controller_handle = get_global_client()._controller
    node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote())
    assert len(node_to_http_actors) == 2

    proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote())
    proxy_names = ActorNameList.FromString(proxy_names_bytes)
    assert len(proxy_names.names) == 2

    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()
示例#18
0
文件: api.py 项目: ray-project/ray
def list_deployments() -> Dict[str, Deployment]:
    """Returns a dictionary of all active deployments.

    Dictionary maps deployment name to Deployment objects.
    """
    infos = get_global_client().list_deployments()

    deployments = {}
    for name, (deployment_info, route_prefix) in infos.items():
        deployments[name] = Deployment(
            deployment_info.replica_config.deployment_def,
            name,
            deployment_info.deployment_config,
            version=deployment_info.version,
            init_args=deployment_info.replica_config.init_args,
            init_kwargs=deployment_info.replica_config.init_kwargs,
            route_prefix=route_prefix,
            ray_actor_options=deployment_info.replica_config.ray_actor_options,
            _internal=True,
        )

    return deployments
示例#19
0
def get_deployment(name: str) -> Deployment:
    """Dynamically fetch a handle to a Deployment object.

    This can be used to update and redeploy a deployment without access to
    the original definition.

    Example:
    >>> from ray import serve
    >>> MyDeployment = serve.get_deployment("name")  # doctest: +SKIP
    >>> MyDeployment.options(num_replicas=10).deploy()  # doctest: +SKIP

    Args:
        name(str): name of the deployment. This must have already been
        deployed.

    Returns:
        Deployment
    """
    try:
        (
            deployment_info,
            route_prefix,
        ) = get_global_client().get_deployment_info(name)
    except KeyError:
        raise KeyError(
            f"Deployment {name} was not found. Did you call Deployment.deploy()?"
        )
    return Deployment(
        cloudpickle.loads(
            deployment_info.replica_config.serialized_deployment_def),
        name,
        deployment_info.deployment_config,
        version=deployment_info.version,
        init_args=deployment_info.replica_config.init_args,
        init_kwargs=deployment_info.replica_config.init_kwargs,
        route_prefix=route_prefix,
        ray_actor_options=deployment_info.replica_config.ray_actor_options,
        _internal=True,
    )
示例#20
0
文件: api.py 项目: ray-project/ray
def start(
    detached: bool = False,
    http_options: Optional[Union[dict, HTTPOptions]] = None,
    dedicated_cpu: bool = False,
    _checkpoint_path: str = DEFAULT_CHECKPOINT_PATH,
    **kwargs,
) -> ServeControllerClient:
    """Initialize a serve instance.

    By default, the instance will be scoped to the lifetime of the returned
    Client object (or when the script exits). If detached is set to True, the
    instance will instead persist until serve.shutdown() is called. This is
    only relevant if connecting to a long-running Ray cluster (e.g., with
    ray.init(address="auto") or ray.init("ray://<remote_addr>")).

    Args:
        detached: Whether not the instance should be detached from this
          script. If set, the instance will live on the Ray cluster until it is
          explicitly stopped with serve.shutdown().
        http_options (Optional[Dict, serve.HTTPOptions]): Configuration options
          for HTTP proxy. You can pass in a dictionary or HTTPOptions object
          with fields:

            - host(str, None): Host for HTTP servers to listen on. Defaults to
              "127.0.0.1". To expose Serve publicly, you probably want to set
              this to "0.0.0.0".
            - port(int): Port for HTTP server. Defaults to 8000.
            - root_path(str): Root path to mount the serve application
              (for example, "/serve"). All deployment routes will be prefixed
              with this path. Defaults to "".
            - middlewares(list): A list of Starlette middlewares that will be
              applied to the HTTP servers in the cluster. Defaults to [].
            - location(str, serve.config.DeploymentMode): The deployment
              location of HTTP servers:

                - "HeadOnly": start one HTTP server on the head node. Serve
                  assumes the head node is the node you executed serve.start
                  on. This is the default.
                - "EveryNode": start one HTTP server per node.
                - "NoServer" or None: disable HTTP server.
            - num_cpus (int): The number of CPU cores to reserve for each
              internal Serve HTTP proxy actor.  Defaults to 0.
        dedicated_cpu: Whether to reserve a CPU core for the internal
          Serve controller actor.  Defaults to False.
    """
    usage_lib.record_library_usage("serve")

    http_deprecated_args = ["http_host", "http_port", "http_middlewares"]
    for key in http_deprecated_args:
        if key in kwargs:
            raise ValueError(
                f"{key} is deprecated, please use serve.start(http_options="
                f'{{"{key}": {kwargs[key]}}}) instead.')
    # Initialize ray if needed.
    ray._private.worker.global_worker.filter_logs_by_job = False
    if not ray.is_initialized():
        ray.init(namespace=SERVE_NAMESPACE)

    try:
        client = get_global_client(_health_check_controller=True)
        logger.info(
            f'Connecting to existing Serve app in namespace "{SERVE_NAMESPACE}".'
        )

        _check_http_and_checkpoint_options(client, http_options,
                                           _checkpoint_path)
        return client
    except RayServeException:
        pass

    if detached:
        controller_name = SERVE_CONTROLLER_NAME
    else:
        controller_name = format_actor_name(get_random_letters(),
                                            SERVE_CONTROLLER_NAME)

    if isinstance(http_options, dict):
        http_options = HTTPOptions.parse_obj(http_options)
    if http_options is None:
        http_options = HTTPOptions()

    controller = ServeController.options(
        num_cpus=1 if dedicated_cpu else 0,
        name=controller_name,
        lifetime="detached" if detached else None,
        max_restarts=-1,
        max_task_retries=-1,
        # Pin Serve controller on the head node.
        resources={
            get_current_node_resource_key(): 0.01
        },
        namespace=SERVE_NAMESPACE,
        max_concurrency=CONTROLLER_MAX_CONCURRENCY,
    ).remote(
        controller_name,
        http_options,
        _checkpoint_path,
        detached=detached,
    )

    proxy_handles = ray.get(controller.get_http_proxies.remote())
    if len(proxy_handles) > 0:
        try:
            ray.get(
                [handle.ready.remote() for handle in proxy_handles.values()],
                timeout=HTTP_PROXY_TIMEOUT,
            )
        except ray.exceptions.GetTimeoutError:
            raise TimeoutError(
                f"HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.")

    client = ServeControllerClient(
        controller,
        controller_name,
        detached=detached,
    )
    set_global_client(client)
    logger.info(f"Started{' detached ' if detached else ' '}Serve instance in "
                f'namespace "{SERVE_NAMESPACE}".')
    return client
示例#21
0
    def delete(self):
        """Delete this deployment."""

        return get_global_client().delete_deployments([self._name])