示例#1
0
async def test_graceful_shutdown(serve_instance, router,
                                 mock_controller_with_name):
    class KeepInflight:
        def __init__(self):
            self.events = []

        def reconfigure(self, config):
            if config["release"]:
                [event.set() for event in self.events]

        async def __call__(self, _):
            e = asyncio.Event()
            self.events.append(e)
            await e.wait()

    backend_worker = await add_servable_to_router(
        KeepInflight,
        router,
        mock_controller_with_name[0],
        backend_config=BackendConfig(
            num_replicas=1,
            internal_metadata=BackendMetadata(is_blocking=False),
            user_config={"release": False}))

    query_param = make_request_param()

    refs = [(await router.assign_request.remote(query_param))
            for _ in range(6)]

    shutdown_ref = backend_worker.drain_pending_queries.remote()

    with pytest.raises(ray.exceptions.GetTimeoutError):
        # Shutdown should block because there are still inflight queries.
        ray.get(shutdown_ref, timeout=2)

    config = BackendConfig()
    config.user_config = {"release": True}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    # All queries should complete successfully
    ray.get(refs)
    # The draining operation should be completed.
    ray.get(shutdown_ref)
示例#2
0
async def test_user_config_update(serve_instance, mock_controller_with_name):
    class Customizable:
        def __init__(self):
            self.reval = ""

        def __call__(self, starlette_request):
            return self.retval

        def reconfigure(self, config):
            self.retval = config["return_val"]

    config = BackendConfig(num_replicas=2,
                           user_config={
                               "return_val": "original",
                               "b": 2
                           })
    worker, router = await add_servable_to_router(Customizable,
                                                  *mock_controller_with_name,
                                                  backend_config=config)

    query_param = make_request_param()

    done = [(await router.assign_request(query_param)) for _ in range(10)]
    for i in done:
        assert await i == "original"

    config = BackendConfig()
    config.user_config = {"return_val": "new_val"}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    async def new_val_returned():
        result = await (await router.assign_request(query_param))
        assert "new_val" == result

    for _ in range(10):
        try:
            await new_val_returned()
        except AssertionError:
            # Wait for config to propogate
            await asyncio.sleep(0.5)
    new_val_returned()
示例#3
0
async def test_user_config_update(serve_instance, router,
                                  mock_controller_with_name):
    class Customizable:
        def __init__(self):
            self.reval = ""

        def __call__(self, starlette_request):
            return self.retval

        def reconfigure(self, config):
            self.retval = config["return_val"]

    config = BackendConfig(
        num_replicas=2, user_config={
            "return_val": "original",
            "b": 2
        })
    await add_servable_to_router(
        Customizable,
        router,
        mock_controller_with_name[0],
        backend_config=config)

    query_param = make_request_param()

    done = [(await router.assign_request.remote(query_param))
            for _ in range(10)]
    for i in done:
        assert await i == "original"

    config = BackendConfig()
    config.user_config = {"return_val": "new_val"}
    await mock_controller_with_name[1].update_backend.remote("backend", config)

    done = [(await router.assign_request.remote(query_param))
            for _ in range(10)]

    for i in done:
        assert await i == "new_val"
示例#4
0
def test_backend_user_config(serve_instance):
    config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2})

    @serve.deployment("counter", config=config)
    class Counter:
        def __init__(self):
            self.count = 10

        def __call__(self, starlette_request):
            return self.count, os.getpid()

        def reconfigure(self, config):
            self.count = config["count"]

    Counter.deploy()
    handle = Counter.get_handle()

    def check(val, num_replicas):
        pids_seen = set()
        for i in range(100):
            result = ray.get(handle.remote())
            if str(result[0]) != val:
                return False
            pids_seen.add(result[1])
        return len(pids_seen) == num_replicas

    wait_for_condition(lambda: check("123", 2))

    config.num_replicas = 3
    Counter = Counter.options(config=config)
    Counter.deploy()
    wait_for_condition(lambda: check("123", 3))

    config.user_config = {"count": 456}
    Counter = Counter.options(config=config)
    Counter.deploy()
    wait_for_condition(lambda: check("456", 3))
示例#5
0
def deployment(
    _func_or_class: Optional[Callable] = None,
    name: Optional[str] = None,
    version: Optional[str] = None,
    prev_version: Optional[str] = None,
    num_replicas: Optional[int] = None,
    init_args: Optional[Tuple[Any]] = None,
    route_prefix: Optional[str] = None,
    ray_actor_options: Optional[Dict] = None,
    user_config: Optional[Any] = None,
    max_concurrent_queries: Optional[int] = None,
    _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None,
) -> Callable[[Callable], Deployment]:
    """Define a Serve deployment.

    Args:
        name (Optional[str]): Globally-unique name identifying this deployment.
            If not provided, the name of the class or function will be used.
        version (Optional[str]): Version of the deployment. This is used to
            indicate a code change for the deployment; when it is re-deployed
            with a version change, a rolling update of the replicas will be
            performed. If not provided, every deployment will be treated as a
            new version.
        prev_version (Optional[str]): Version of the existing deployment which
            is used as a precondition for the next deployment. If prev_version
            does not match with the existing deployment's version, the
            deployment will fail. If not provided, deployment procedure will
            not check the existing deployment's version.
        num_replicas (Optional[int]): The number of processes to start up that
            will handle requests to this deployment. Defaults to 1.
        init_args (Optional[Tuple]): Arguments to be passed to the class
            constructor when starting up deployment replicas. These can also be
            passed when you call `.deploy()` on the returned Deployment.
        route_prefix (Optional[str]): Requests to paths under this HTTP path
            prefix will be routed to this deployment. Defaults to '/{name}'.
            Routing is done based on longest-prefix match, so if you have
            deployment A with a prefix of '/a' and deployment B with a prefix
            of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests
            to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with
            a '/' unless they're the root (just '/'), which acts as a
            catch-all.
        ray_actor_options (dict): Options to be passed to the Ray actor
            constructor such as resource requirements.
        user_config (Optional[Any]): [experimental] Config to pass to the
            reconfigure method of the deployment. This can be updated
            dynamically without changing the version of the deployment and
            restarting its replicas. The user_config needs to be hashable to
            keep track of updates, so it must only contain hashable types, or
            hashable types nested in lists and dictionaries.
        max_concurrent_queries (Optional[int]): The maximum number of queries
            that will be sent to a replica of this deployment without receiving
            a response. Defaults to 100.

    Example:

    >>> @serve.deployment(name="deployment1", version="v1")
        class MyDeployment:
            pass

    >>> MyDeployment.deploy(*init_args)
    >>> MyDeployment.options(num_replicas=2, init_args=init_args).deploy()

    Returns:
        Deployment
    """

    config = BackendConfig()
    if num_replicas is not None:
        config.num_replicas = num_replicas

    if user_config is not None:
        config.user_config = user_config

    if max_concurrent_queries is not None:
        config.max_concurrent_queries = max_concurrent_queries

    if _autoscaling_config is not None:
        config.autoscaling_config = _autoscaling_config

    def decorator(_func_or_class):
        return Deployment(
            _func_or_class,
            name if name is not None else _func_or_class.__name__,
            config,
            version=version,
            prev_version=prev_version,
            init_args=init_args,
            route_prefix=route_prefix,
            ray_actor_options=ray_actor_options,
            _internal=True,
        )

    # This handles both parametrized and non-parametrized usage of the
    # decorator. See the @serve.batch code for more details.
    return decorator(_func_or_class) if callable(_func_or_class) else decorator