async def test_router_use_max_concurrency(serve_instance): signal = SignalActor.remote() @ray.remote class MockWorker: async def handle_request(self, request): await signal.wait.remote() return "DONE" def ready(self): pass class VisibleRouter(Router): def get_queues(self): return self.queries_counter, self.backend_queues worker = MockWorker.remote() q = ray.remote(VisibleRouter).remote() BACKEND_NAME = "max-concurrent-test" config = BackendConfig({"max_concurrent_queries": 1}) await q.set_traffic.remote("svc", TrafficPolicy({BACKEND_NAME: 1.0})) await q.add_new_worker.remote(BACKEND_NAME, "replica-tag", worker) await q.set_backend_config.remote(BACKEND_NAME, config) # We send over two queries first_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1) second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1) # Neither queries should be available with pytest.raises(ray.exceptions.RayTimeoutError): ray.get([first_query, second_query], timeout=0.2) # Let's retrieve the router internal state queries_counter, backend_queues = await q.get_queues.remote() # There should be just one inflight request assert queries_counter["max-concurrent-test:replica-tag"] == 1 # The second query is buffered assert len(backend_queues["max-concurrent-test"]) == 1 # Let's unblock the first query await signal.send.remote(clear=True) assert await first_query == "DONE" # The internal state of router should have changed. queries_counter, backend_queues = await q.get_queues.remote() # There should still be one inflight request assert queries_counter["max-concurrent-test:replica-tag"] == 1 # But there shouldn't be any queries in the queue assert len(backend_queues["max-concurrent-test"]) == 0 # Unblocking the second query await signal.send.remote(clear=True) assert await second_query == "DONE" # Checking the internal state of the router one more time queries_counter, backend_queues = await q.get_queues.remote() assert queries_counter["max-concurrent-test:replica-tag"] == 0 assert len(backend_queues["max-concurrent-test"]) == 0
def test_worker_replica_failure(serve_instance): @ray.remote class Counter: def __init__(self): self.count = 0 def inc_and_get(self): self.count += 1 return self.count class Worker: # Assumes that two replicas are started. Will hang forever in the # constructor for any workers that are restarted. def __init__(self, counter): self.should_hang = False self.index = ray.get(counter.inc_and_get.remote()) if self.index > 2: while True: pass def __call__(self, *args): return self.index counter = Counter.remote() serve.create_backend("replica_failure", Worker, counter) serve.update_backend_config( "replica_failure", BackendConfig(num_replicas=2)) serve.create_endpoint( "replica_failure", backend="replica_failure", route="/replica_failure") # Wait until both replicas have been started. responses = set() start = time.time() while time.time() - start < 30: time.sleep(0.1) response = request_with_retries("/replica_failure", timeout=1).text assert response in ["1", "2"] responses.add(response) if len(responses) > 1: break else: raise TimeoutError("Timed out waiting for replicas after 30s.") # Kill one of the replicas. handles = _get_worker_handles("replica_failure") assert len(handles) == 2 ray.kill(handles[0], no_restart=False) # Check that the other replica still serves requests. for _ in range(10): while True: try: # The timeout needs to be small here because the request to # the restarting worker will hang. request_with_retries("/replica_failure", timeout=0.1) break except TimeoutError: time.sleep(0.1)
async def test_task_runner_custom_method_batch(serve_instance): q = RoundRobinPolicyQueueActor.remote() @serve.accept_batch class Batcher: def a(self, _): return ["a-{}".format(i) for i in range(serve.context.batch_size)] def b(self, _): return ["b-{}".format(i) for i in range(serve.context.batch_size)] def error_different_size(self, _): return [""] * (serve.context.batch_size * 2) def error_non_iterable(self, _): return 42 def return_np_array(self, _): return np.array([1] * serve.context.batch_size).astype(np.int32) CONSUMER_NAME = "runner" PRODUCER_NAME = "producer" worker = setup_worker(CONSUMER_NAME, Batcher) await q.set_traffic.remote(PRODUCER_NAME, {CONSUMER_NAME: 1.0}) await q.set_backend_config.remote( CONSUMER_NAME, BackendConfig({ "max_batch_size": 10 }, accepts_batches=True)) def make_request_param(call_method): return RequestMetadata( PRODUCER_NAME, context.TaskContext.Python, call_method=call_method) a_query_param = make_request_param("a") b_query_param = make_request_param("b") futures = [q.enqueue_request.remote(a_query_param) for _ in range(2)] futures += [q.enqueue_request.remote(b_query_param) for _ in range(2)] await q.add_new_worker.remote(CONSUMER_NAME, "replica1", worker) gathered = await asyncio.gather(*futures) assert set(gathered) == {"a-0", "a-1", "b-0", "b-1"} with pytest.raises(RayServeException, match="doesn't preserve batch size"): different_size = make_request_param("error_different_size") await q.enqueue_request.remote(different_size) with pytest.raises(RayServeException, match="iterable"): non_iterable = make_request_param("error_non_iterable") await q.enqueue_request.remote(non_iterable) np_array = make_request_param("return_np_array") result_np_value = await q.enqueue_request.remote(np_array) assert isinstance(result_np_value, np.int32)
def test_worker_replica_failure(serve_instance): client = serve_instance class Worker: # Assumes that two replicas are started. Will hang forever in the # constructor for any workers that are restarted. def __init__(self, path): self.should_hang = False if not os.path.exists(path): with open(path, "w") as f: f.write("1") else: with open(path, "r") as f: num = int(f.read()) with open(path, "w") as f: if num == 2: self.should_hang = True else: f.write(str(num + 1)) if self.should_hang: while True: pass def __call__(self): pass temp_path = os.path.join(tempfile.gettempdir(), serve.utils.get_random_letters()) client.create_backend("replica_failure", Worker, temp_path) client.update_backend_config("replica_failure", BackendConfig(num_replicas=2)) client.create_endpoint("replica_failure", backend="replica_failure", route="/replica_failure") # Wait until both replicas have been started. responses = set() while len(responses) == 1: responses.add(request_with_retries("/replica_failure", timeout=1).text) time.sleep(0.1) # Kill one of the replicas. handles = _get_worker_handles(client, "replica_failure") assert len(handles) == 2 ray.kill(handles[0], no_restart=False) # Check that the other replica still serves requests. for _ in range(10): while True: try: # The timeout needs to be small here because the request to # the restarting worker will hang. request_with_retries("/replica_failure", timeout=0.1) break except TimeoutError: time.sleep(0.1)
def deploy(self, name: str, backend_def: Union[Callable, Type[Callable], str], *init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, version: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} # If conda is activated and a conda env is not specified in runtime_env # in ray_actor_options, default to conda env of this process (client). # Without this code, the backend would run in the controller's conda # env, which is likely different from that of the client. # If using Ray client, skip this convenience feature because the local # client env doesn't create the Ray cluster (so the client env is # likely not present on the cluster.) if not ray.util.client.ray.is_connected(): if ray_actor_options.get("runtime_env") is None: ray_actor_options["runtime_env"] = {} if ray_actor_options["runtime_env"].get("conda") is None: current_env = os.environ.get("CONDA_DEFAULT_ENV") if current_env is not None and current_env != "": ray_actor_options["runtime_env"]["conda"] = current_env replica_config = ReplicaConfig(backend_def, *init_args, ray_actor_options=ray_actor_options) metadata = BackendMetadata( accepts_batches=replica_config.accepts_batches, is_blocking=replica_config.is_blocking, is_asgi_app=replica_config.is_asgi_app, path_prefix=replica_config.path_prefix, ) if isinstance(config, dict): backend_config = BackendConfig.parse_obj({ **config, "internal_metadata": metadata }) elif isinstance(config, BackendConfig): backend_config = config.copy( update={"internal_metadata": metadata}) else: raise TypeError("config must be a BackendConfig or a dictionary.") backend_config._validate_complete() goal_ref = self._controller.deploy.remote(name, backend_config, replica_config, version) if _blocking: self._wait_for_goal(goal_ref) else: return goal_ref
async def test_graceful_shutdown(serve_instance, router, mock_controller_with_name): class KeepInflight: def __init__(self): self.events = [] def reconfigure(self, config): if config["release"]: [event.set() for event in self.events] async def __call__(self, _): e = asyncio.Event() self.events.append(e) await e.wait() backend_worker = await add_servable_to_router( KeepInflight, router, mock_controller_with_name[0], backend_config=BackendConfig( num_replicas=1, internal_metadata=BackendMetadata(is_blocking=False), user_config={"release": False})) query_param = make_request_param() refs = [(await router.assign_request.remote(query_param)) for _ in range(6)] shutdown_ref = backend_worker.drain_pending_queries.remote() with pytest.raises(ray.exceptions.GetTimeoutError): # Shutdown should block because there are still inflight queries. ray.get(shutdown_ref, timeout=2) config = BackendConfig() config.user_config = {"release": True} await mock_controller_with_name[1].update_backend.remote("backend", config) # All queries should complete successfully ray.get(refs) # The draining operation should be completed. ray.get(shutdown_ref)
async def do_autoscale(self) -> None: for backend, info in self.configuration_store.backends.items(): if backend not in self.autoscaling_policies: continue new_num_replicas = self.autoscaling_policies[backend].scale( self.backend_stats[backend], info.backend_config.num_replicas) if new_num_replicas > 0: await self.update_backend_config( backend, BackendConfig(num_replicas=new_num_replicas))
def test_scaling_replicas(serve_instance, use_legacy_config): client = serve_instance class Counter: def __init__(self): self.count = 0 def __call__(self, _): self.count += 1 return self.count config = { "num_replicas": 2 } if use_legacy_config else BackendConfig(num_replicas=2) client.create_backend("counter:v1", Counter, config=config) client.create_endpoint("counter", backend="counter:v1", route="/increment") # Keep checking the routing table until /increment is populated while "/increment" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # If the load is shared among two replicas. The max result cannot be 10. assert max(counter_result) < 10 update_config = { "num_replicas": 1 } if use_legacy_config else BackendConfig(num_replicas=1) client.update_backend_config("counter:v1", update_config) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # Give some time for a replica to spin down. But majority of the request # should be served by the only remaining replica. assert max(counter_result) - min(counter_result) > 6
async def test_user_config_update(serve_instance, mock_controller_with_name): class Customizable: def __init__(self): self.reval = "" def __call__(self, starlette_request): return self.retval def reconfigure(self, config): self.retval = config["return_val"] config = BackendConfig(num_replicas=2, user_config={ "return_val": "original", "b": 2 }) worker, router = await add_servable_to_router(Customizable, *mock_controller_with_name, backend_config=config) query_param = make_request_param() done = [(await router.assign_request(query_param)) for _ in range(10)] for i in done: assert await i == "original" config = BackendConfig() config.user_config = {"return_val": "new_val"} await mock_controller_with_name[1].update_backend.remote("backend", config) async def new_val_returned(): result = await (await router.assign_request(query_param)) assert "new_val" == result for _ in range(10): try: await new_val_returned() except AssertionError: # Wait for config to propogate await asyncio.sleep(0.5) new_val_returned()
def add_new_replica(self, backend_tag, runner_actor, backend_config=BackendConfig()): self.backend_replicas[backend_tag].append(runner_actor) self.backend_configs[backend_tag] = backend_config self.host.notify_changed( "worker_handles", self.backend_replicas, ) self.host.notify_changed("backend_configs", self.backend_configs)
async def test_user_config_update(serve_instance, router, mock_controller_with_name): class Customizable: def __init__(self): self.reval = "" def __call__(self, starlette_request): return self.retval def reconfigure(self, config): self.retval = config["return_val"] config = BackendConfig( num_replicas=2, user_config={ "return_val": "original", "b": 2 }) await add_servable_to_router( Customizable, router, mock_controller_with_name[0], backend_config=config) query_param = make_request_param() done = [(await router.assign_request.remote(query_param)) for _ in range(10)] for i in done: assert await i == "original" config = BackendConfig() config.user_config = {"return_val": "new_val"} await mock_controller_with_name[1].update_backend.remote("backend", config) done = [(await router.assign_request.remote(query_param)) for _ in range(10)] for i in done: assert await i == "new_val"
def add_new_replica(self, backend_tag, runner_actor, backend_config=BackendConfig()): self.backend_replicas[backend_tag].append(runner_actor) self.backend_configs[backend_tag] = backend_config self.host.notify_changed( LongPollKey.REPLICA_HANDLES, self.backend_replicas, ) self.host.notify_changed(LongPollKey.BACKEND_CONFIGS, self.backend_configs)
def test_updating_config(serve_instance, use_legacy_config): client = serve_instance class BatchSimple: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, request): return [1] * len(request) config = { "max_batch_size": 2, "num_replicas": 3 } if use_legacy_config else BackendConfig( max_batch_size=2, num_replicas=3) client.create_backend("bsimple:v1", BatchSimple, config=config) client.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple") controller = client._controller old_replica_tag_list = ray.get( controller._list_replicas.remote("bsimple:v1")) update_config = { "max_batch_size": 5 } if use_legacy_config else BackendConfig(max_batch_size=5) client.update_backend_config("bsimple:v1", update_config) new_replica_tag_list = ray.get( controller._list_replicas.remote("bsimple:v1")) new_all_tag_list = [] for worker_dict in ray.get( controller.get_all_worker_handles.remote()).values(): new_all_tag_list.extend(list(worker_dict.keys())) # the old and new replica tag list should be identical # and should be subset of all_tag_list assert set(old_replica_tag_list) <= set(new_all_tag_list) assert set(old_replica_tag_list) == set(new_replica_tag_list)
def test_serve_graceful_shutdown(serve_instance): client = serve_instance signal = SignalActor.remote() class WaitBackend: @serve.accept_batch async def __call__(self, requests): signal_actor = await requests[0].body() await signal_actor.wait.remote() return ["" for _ in range(len(requests))] client.create_backend( "wait", WaitBackend, config=BackendConfig( # Make sure we can queue up queries in the replica side. max_concurrent_queries=10, max_batch_size=1, experimental_graceful_shutdown_wait_loop_s=0.5, experimental_graceful_shutdown_timeout_s=1000, )) client.create_endpoint("wait", backend="wait") handle = client.get_handle("wait") refs = [handle.remote(signal) for _ in range(10)] # Wait for all the queries to be enqueued with pytest.raises(ray.exceptions.GetTimeoutError): ray.get(refs, timeout=1) @ray.remote(num_cpus=0) def do_blocking_delete(): client = serve.connect() client.delete_endpoint("wait") client.delete_backend("wait") # Now delete the backend. This should trigger the shutdown sequence. delete_ref = do_blocking_delete.remote() # The queries should be enqueued but not executed becuase they are blocked # by signal actor. with pytest.raises(ray.exceptions.GetTimeoutError): ray.get(refs, timeout=1) signal.send.remote() # All the queries should be drained and executed without error. ray.get(refs) # Blocking delete should complete. ray.get(delete_ref)
async def update_backend_config(self, backend_tag: BackendTag, config_options: BackendConfig) -> GoalId: """Set the config for the specified backend.""" async with self.write_lock: existing_backend_info = self.backend_state.get_backend(backend_tag) if existing_backend_info is None: raise ValueError(f"Backend {backend_tag} is not registered.") existing_replica_config = existing_backend_info.replica_config new_backend_config = existing_backend_info.backend_config.copy( update=config_options.dict(exclude_unset=True)) return self.backend_state.deploy_backend( backend_tag, new_backend_config, existing_replica_config)
async def __init__(self, backend_tag, replica_tag, init_args, init_kwargs, backend_config_proto_bytes: bytes, version: BackendVersion, controller_name: str, detached: bool): backend = cloudpickle.loads(serialized_backend_def) backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if inspect.isfunction(backend): is_function = True elif inspect.isclass(backend): is_function = False else: assert False, ("backend_def must be function, class, or " "corresponding import path.") # Set the controller name so that serve.connect() in the user's # backend code will connect to the instance that this backend is # running in. ray.serve.api._set_internal_replica_context(backend_tag, replica_tag, controller_name, servable_object=None) if is_function: _callable = backend else: # This allows backends to define an async __init__ method # (required for FastAPI backend definition). _callable = backend.__new__(backend) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.api._set_internal_replica_context( backend_tag, replica_tag, controller_name, servable_object=_callable) assert controller_name, "Must provide a valid controller_name" controller_namespace = ray.serve.api._get_controller_namespace( detached) controller_handle = ray.get_actor(controller_name, namespace=controller_namespace) self.backend = RayServeReplica(_callable, backend_tag, replica_tag, backend_config, backend_config.user_config, version, is_function, controller_handle) # asyncio.Event used to signal that the replica is shutting down. self.shutdown_event = asyncio.Event()
def test_backend_user_config(serve_instance): config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2}) @serve.deployment("counter", config=config) class Counter: def __init__(self): self.count = 10 def __call__(self, starlette_request): return self.count, os.getpid() def reconfigure(self, config): self.count = config["count"] Counter.deploy() handle = Counter.get_handle() def check(val, num_replicas): pids_seen = set() for i in range(100): result = ray.get(handle.remote()) if str(result[0]) != val: return False pids_seen.add(result[1]) return len(pids_seen) == num_replicas wait_for_condition(lambda: check("123", 2)) config.num_replicas = 3 Counter = Counter.options(config=config) Counter.deploy() wait_for_condition(lambda: check("123", 3)) config.user_config = {"count": 456} Counter = Counter.options(config=config) Counter.deploy() wait_for_condition(lambda: check("456", 3))
def create_backend(backend_tag: str, func_or_class: Union[Callable, Type[Callable]], *actor_init_args: Any, ray_actor_options: Optional[Dict] = None, config: Optional[Dict[str, Any]] = None) -> None: """Create a backend with the provided tag. The backend will serve requests with func_or_class. Args: backend_tag (str): a unique tag assign to identify this backend. func_or_class (callable, class): a function or a class implementing __call__. actor_init_args (optional): the arguments to pass to the class. initialization method. ray_actor_options (optional): options to be passed into the @ray.remote decorator for the backend actor. config (optional): configuration options for this backend. Supported options: - "num_replicas": number of worker processes to start up that will handle requests to this backend. - "max_batch_size": the maximum number of requests that will be processed in one batch by this backend. - "batch_wait_timeout": time in seconds that backend replicas will wait for a full batch of requests before processing a partial batch. - "max_concurrent_queries": the maximum number of queries that will be sent to a replica of this backend without receiving a response. """ if backend_tag in list_backends(): raise ValueError( "Cannot create backend. " "Backend '{}' is already registered.".format(backend_tag)) if config is None: config = {} if not isinstance(config, dict): raise TypeError("config must be a dictionary.") replica_config = ReplicaConfig(func_or_class, *actor_init_args, ray_actor_options=ray_actor_options) backend_config = BackendConfig(config, replica_config.accepts_batches, replica_config.is_blocking) ray.get( controller.create_backend.remote(backend_tag, backend_config, replica_config))
def test_backend_config_validation(): # Test unknown key. with pytest.raises(ValueError, match="unknown_key"): BackendConfig({"unknown_key": -1}) # Test num_replicas validation. BackendConfig({"num_replicas": 1}) with pytest.raises(TypeError): BackendConfig({"num_replicas": "hello"}) with pytest.raises(ValueError): BackendConfig({"num_replicas": -1}) # Test max_batch_size validation. BackendConfig({"max_batch_size": 10}, accepts_batches=True) with pytest.raises(ValueError): BackendConfig({"max_batch_size": 10}, accepts_batches=False) with pytest.raises(TypeError): BackendConfig({"max_batch_size": 1.0}) with pytest.raises(TypeError): BackendConfig({"max_batch_size": "hello"}) with pytest.raises(ValueError): BackendConfig({"max_batch_size": 0}) with pytest.raises(ValueError): BackendConfig({"max_batch_size": -1})
def test_imported_backend(serve_instance): client = serve_instance backend_class = ImportedBackend("ray.serve.utils.MockImportedBackend") config = BackendConfig(user_config="config", max_batch_size=2) client.create_backend( "imported", backend_class, "input_arg", config=config) client.create_endpoint("imported", backend="imported") # Basic sanity check. handle = client.get_handle("imported") assert ray.get(handle.remote()) == {"arg": "input_arg", "config": "config"} # Check that updating backend config works. client.update_backend_config( "imported", BackendConfig(user_config="new_config")) assert ray.get(handle.remote()) == { "arg": "input_arg", "config": "new_config" } # Check that other call methods work. handle = handle.options(method_name="other_method") assert ray.get(handle.remote("hello")) == "hello"
def test_list_backends(serve_instance): def f(): pass config1 = BackendConfig(max_concurrent_queries=10) serve.create_backend("backend", f, config=config1) backends = serve.list_backends() assert len(backends) == 1 assert "backend" in backends assert backends["backend"].max_concurrent_queries == 10 config2 = BackendConfig(num_replicas=10) serve.create_backend("backend2", f, config=config2) backends = serve.list_backends() assert len(backends) == 2 assert backends["backend2"].num_replicas == 10 serve.delete_backend("backend") backends = serve.list_backends() assert len(backends) == 1 assert "backend2" in backends serve.delete_backend("backend2") assert len(serve.list_backends()) == 0
async def test_task_runner_perform_batch(serve_instance, router): def batcher(requests): batch_size = len(requests) return [batch_size] * batch_size config = BackendConfig( max_batch_size=2, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) _ = await add_servable_to_router(batcher, router, backend_config=config) query_param = make_request_param() my_batch_sizes = await asyncio.gather( *[router.enqueue_request.remote(query_param) for _ in range(3)]) assert my_batch_sizes == [2, 2, 1]
def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError(f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_replica_wrapper(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # This is probably not the desired behavior for an autoscaling # deployment, which redeploys very often to change num_replicas. goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
async def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) async with self.write_lock: if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def test_serve_forceful_shutdown(serve_instance): def sleeper(_): while True: time.sleep(1000) serve.create_backend( "sleeper", sleeper, config=BackendConfig(experimental_graceful_shutdown_timeout_s=1)) serve.create_endpoint("sleeper", backend="sleeper") handle = serve.get_handle("sleeper") ref = handle.remote() serve.delete_endpoint("sleeper") serve.delete_backend("sleeper") with pytest.raises(ray.exceptions.RayActorError): ray.get(ref)
def test_batching_exception(serve_instance): class NoListReturned: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, requests): return len(requests) # Set the max batch size. config = BackendConfig(max_batch_size=5) serve.create_backend("exception:v1", NoListReturned, config=config) serve.create_endpoint("exception-test", backend="exception:v1") handle = serve.get_handle("exception-test") with pytest.raises(ray.exceptions.RayTaskError): assert ray.get(handle.remote(temp=1))
async def update_backend_config(self, backend_tag: BackendTag, config_options: BackendConfig) -> GoalId: """Set the config for the specified backend.""" async with self.write_lock: existing_info = self.backend_state.get_backend(backend_tag) if existing_info is None: raise ValueError(f"Backend {backend_tag} is not registered.") backend_info = BackendInfo( actor_def=existing_info.actor_def, version=existing_info.version, backend_config=existing_info.backend_config.copy( update=config_options.dict(exclude_unset=True)), replica_config=existing_info.replica_config) goal_id, _ = self.backend_state.deploy_backend( backend_tag, backend_info) return goal_id
def add_new_replica(self, backend_tag, runner_actor, backend_config=BackendConfig()): self.backend_replicas[backend_tag].append(runner_actor) self.backend_configs[backend_tag] = backend_config ray.get(runner_actor.reconfigure.remote( backend_config.user_config)) self.host.notify_changed( (LongPollNamespace.REPLICA_HANDLES, backend_tag), self.backend_replicas[backend_tag], ) self.host.notify_changed( (LongPollNamespace.BACKEND_CONFIGS, backend_tag), self.backend_configs[backend_tag], )
async def test_task_runner_perform_batch(serve_instance, mock_controller_with_name): def batcher(requests): batch_size = len(requests) return [batch_size] * batch_size config = BackendConfig( max_batch_size=2, batch_wait_timeout=10, internal_metadata=BackendMetadata(accepts_batches=True)) worker, router = await add_servable_to_router(batcher, *mock_controller_with_name, backend_config=config) query_param = make_request_param() my_batch_sizes = await asyncio.gather( *[(await router.assign_request(query_param)) for _ in range(3)]) assert my_batch_sizes == [2, 2, 1]
def test_batching_exception(serve_instance): class NoListReturned: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, flask_request, temp=None): batch_size = serve.context.batch_size return batch_size # set the max batch size serve.create_backend( "exception:v1", NoListReturned, config=BackendConfig(max_batch_size=5)) serve.create_endpoint( "exception-test", backend="exception:v1", route="/noListReturned") handle = serve.get_handle("exception-test") with pytest.raises(ray.exceptions.RayTaskError): assert ray.get(handle.remote(temp=1))