async def test_graceful_shutdown(serve_instance, router, mock_controller_with_name): class KeepInflight: def __init__(self): self.events = [] def reconfigure(self, config): if config["release"]: [event.set() for event in self.events] async def __call__(self, _): e = asyncio.Event() self.events.append(e) await e.wait() backend_worker = await add_servable_to_router( KeepInflight, router, mock_controller_with_name[0], backend_config=BackendConfig( num_replicas=1, internal_metadata=BackendMetadata(is_blocking=False), user_config={"release": False})) query_param = make_request_param() refs = [(await router.assign_request.remote(query_param)) for _ in range(6)] shutdown_ref = backend_worker.drain_pending_queries.remote() with pytest.raises(ray.exceptions.GetTimeoutError): # Shutdown should block because there are still inflight queries. ray.get(shutdown_ref, timeout=2) config = BackendConfig() config.user_config = {"release": True} await mock_controller_with_name[1].update_backend.remote("backend", config) # All queries should complete successfully ray.get(refs) # The draining operation should be completed. ray.get(shutdown_ref)
async def test_user_config_update(serve_instance, mock_controller_with_name): class Customizable: def __init__(self): self.reval = "" def __call__(self, starlette_request): return self.retval def reconfigure(self, config): self.retval = config["return_val"] config = BackendConfig(num_replicas=2, user_config={ "return_val": "original", "b": 2 }) worker, router = await add_servable_to_router(Customizable, *mock_controller_with_name, backend_config=config) query_param = make_request_param() done = [(await router.assign_request(query_param)) for _ in range(10)] for i in done: assert await i == "original" config = BackendConfig() config.user_config = {"return_val": "new_val"} await mock_controller_with_name[1].update_backend.remote("backend", config) async def new_val_returned(): result = await (await router.assign_request(query_param)) assert "new_val" == result for _ in range(10): try: await new_val_returned() except AssertionError: # Wait for config to propogate await asyncio.sleep(0.5) new_val_returned()
async def test_user_config_update(serve_instance, router, mock_controller_with_name): class Customizable: def __init__(self): self.reval = "" def __call__(self, starlette_request): return self.retval def reconfigure(self, config): self.retval = config["return_val"] config = BackendConfig( num_replicas=2, user_config={ "return_val": "original", "b": 2 }) await add_servable_to_router( Customizable, router, mock_controller_with_name[0], backend_config=config) query_param = make_request_param() done = [(await router.assign_request.remote(query_param)) for _ in range(10)] for i in done: assert await i == "original" config = BackendConfig() config.user_config = {"return_val": "new_val"} await mock_controller_with_name[1].update_backend.remote("backend", config) done = [(await router.assign_request.remote(query_param)) for _ in range(10)] for i in done: assert await i == "new_val"
def test_backend_user_config(serve_instance): config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2}) @serve.deployment("counter", config=config) class Counter: def __init__(self): self.count = 10 def __call__(self, starlette_request): return self.count, os.getpid() def reconfigure(self, config): self.count = config["count"] Counter.deploy() handle = Counter.get_handle() def check(val, num_replicas): pids_seen = set() for i in range(100): result = ray.get(handle.remote()) if str(result[0]) != val: return False pids_seen.add(result[1]) return len(pids_seen) == num_replicas wait_for_condition(lambda: check("123", 2)) config.num_replicas = 3 Counter = Counter.options(config=config) Counter.deploy() wait_for_condition(lambda: check("123", 3)) config.user_config = {"count": 456} Counter = Counter.options(config=config) Counter.deploy() wait_for_condition(lambda: check("456", 3))
def deployment( _func_or_class: Optional[Callable] = None, name: Optional[str] = None, version: Optional[str] = None, prev_version: Optional[str] = None, num_replicas: Optional[int] = None, init_args: Optional[Tuple[Any]] = None, route_prefix: Optional[str] = None, ray_actor_options: Optional[Dict] = None, user_config: Optional[Any] = None, max_concurrent_queries: Optional[int] = None, _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, ) -> Callable[[Callable], Deployment]: """Define a Serve deployment. Args: name (Optional[str]): Globally-unique name identifying this deployment. If not provided, the name of the class or function will be used. version (Optional[str]): Version of the deployment. This is used to indicate a code change for the deployment; when it is re-deployed with a version change, a rolling update of the replicas will be performed. If not provided, every deployment will be treated as a new version. prev_version (Optional[str]): Version of the existing deployment which is used as a precondition for the next deployment. If prev_version does not match with the existing deployment's version, the deployment will fail. If not provided, deployment procedure will not check the existing deployment's version. num_replicas (Optional[int]): The number of processes to start up that will handle requests to this deployment. Defaults to 1. init_args (Optional[Tuple]): Arguments to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. route_prefix (Optional[str]): Requests to paths under this HTTP path prefix will be routed to this deployment. Defaults to '/{name}'. Routing is done based on longest-prefix match, so if you have deployment A with a prefix of '/a' and deployment B with a prefix of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with a '/' unless they're the root (just '/'), which acts as a catch-all. ray_actor_options (dict): Options to be passed to the Ray actor constructor such as resource requirements. user_config (Optional[Any]): [experimental] Config to pass to the reconfigure method of the deployment. This can be updated dynamically without changing the version of the deployment and restarting its replicas. The user_config needs to be hashable to keep track of updates, so it must only contain hashable types, or hashable types nested in lists and dictionaries. max_concurrent_queries (Optional[int]): The maximum number of queries that will be sent to a replica of this deployment without receiving a response. Defaults to 100. Example: >>> @serve.deployment(name="deployment1", version="v1") class MyDeployment: pass >>> MyDeployment.deploy(*init_args) >>> MyDeployment.options(num_replicas=2, init_args=init_args).deploy() Returns: Deployment """ config = BackendConfig() if num_replicas is not None: config.num_replicas = num_replicas if user_config is not None: config.user_config = user_config if max_concurrent_queries is not None: config.max_concurrent_queries = max_concurrent_queries if _autoscaling_config is not None: config.autoscaling_config = _autoscaling_config def decorator(_func_or_class): return Deployment( _func_or_class, name if name is not None else _func_or_class.__name__, config, version=version, prev_version=prev_version, init_args=init_args, route_prefix=route_prefix, ray_actor_options=ray_actor_options, _internal=True, ) # This handles both parametrized and non-parametrized usage of the # decorator. See the @serve.batch code for more details. return decorator(_func_or_class) if callable(_func_or_class) else decorator