def test_not_killing_replicas(serve_instance): class BatchSimple: def __init__(self): self.count = 0 @serve_benchmark.accept_batch def __call__(self, flask_request, temp=None): batch_size = serve_benchmark.context.batch_size return [1] * batch_size serve_benchmark.create_endpoint("bsimple", "/bsimple") b_config = BackendConfig(num_replicas=3, max_batch_size=2) serve_benchmark.create_backend(BatchSimple, "bsimple:v1", backend_config=b_config) global_state = serve_benchmark.api._get_global_state() old_replica_tag_list = global_state.backend_table.list_replicas( "bsimple:v1") bnew_config = serve_benchmark.get_backend_config("bsimple:v1") # change the config bnew_config.max_batch_size = 5 # set the config serve_benchmark.set_backend_config("bsimple:v1", bnew_config) new_replica_tag_list = global_state.backend_table.list_replicas( "bsimple:v1") global_state.refresh_actor_handle_cache() new_all_tag_list = list(global_state.actor_handle_cache.keys()) # the old and new replica tag list should be identical # and should be subset of all_tag_list assert set(old_replica_tag_list) <= set(new_all_tag_list) assert set(old_replica_tag_list) == set(new_replica_tag_list)
def test_killing_replicas(serve_instance): class Simple: def __init__(self): self.count = 0 def __call__(self, flask_request, temp=None): return temp serve_benchmark.create_endpoint("simple", "/simple") b_config = BackendConfig(num_replicas=3, num_cpus=2) serve_benchmark.create_backend(Simple, "simple:v1", backend_config=b_config) global_state = serve_benchmark.api._get_global_state() old_replica_tag_list = global_state.backend_table.list_replicas( "simple:v1") bnew_config = serve_benchmark.get_backend_config("simple:v1") # change the config bnew_config.num_cpus = 1 # set the config serve_benchmark.set_backend_config("simple:v1", bnew_config) new_replica_tag_list = global_state.backend_table.list_replicas( "simple:v1") global_state.refresh_actor_handle_cache() new_all_tag_list = list(global_state.actor_handle_cache.keys()) # the new_replica_tag_list must be subset of all_tag_list assert set(new_replica_tag_list) <= set(new_all_tag_list) # the old_replica_tag_list must not be subset of all_tag_list assert not set(old_replica_tag_list) <= set(new_all_tag_list)
def test_route_decorator(serve_instance): @serve_benchmark.route("/hello_world") def hello_world(_): return "" assert isinstance(hello_world, RayServeHandle) hello_world.scale(2) assert ( serve_benchmark.get_backend_config("hello_world:v0").num_replicas == 2) with pytest.raises(RayServeException, match="method does not accept batching"): hello_world.set_max_batch_size(2)
def test_scaling_replicas(serve_instance): class Counter: def __init__(self): self.count = 0 def __call__(self, _): self.count += 1 return self.count serve_benchmark.create_endpoint("counter", "/increment") # Keep checking the routing table until /increment is populated while ("/increment" not in requests.get("http://127.0.0.1:8000/-/routes").json()): time.sleep(0.2) b_config = BackendConfig(num_replicas=2) serve_benchmark.create_backend(Counter, "counter:v1", backend_config=b_config) serve_benchmark.link("counter", "counter:v1") counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # If the load is shared among two replicas. The max result cannot be 10. assert max(counter_result) < 10 b_config = serve_benchmark.get_backend_config("counter:v1") b_config.num_replicas = 1 serve_benchmark.set_backend_config("counter:v1", b_config) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # Give some time for a replica to spin down. But majority of the request # should be served by the only remaining replica. assert max(counter_result) - min(counter_result) > 6
def get_backend_config(self, backend_tag=None): with serve_benchmark.using_router(self.endpoint_name): backend_tag = self._ensure_backend_unique(backend_tag) return serve_benchmark.get_backend_config(backend_tag)
def set_max_batch_size(self, new_max_batch_size, backend_tag=None): with serve_benchmark.using_router(self.endpoint_name): backend_tag = self._ensure_backend_unique(backend_tag) config = serve_benchmark.get_backend_config(backend_tag) config.max_batch_size = new_max_batch_size serve_benchmark.set_backend_config(backend_tag, config)
def scale(self, new_num_replicas, backend_tag=None): with serve_benchmark.using_router(self.endpoint_name): backend_tag = self._ensure_backend_unique(backend_tag) config = serve_benchmark.get_backend_config(backend_tag) config.num_replicas = new_num_replicas serve_benchmark.set_backend_config(backend_tag, config)