def test_worker_restart(serve_instance): serve.init() serve.create_endpoint("worker_failure", "/worker_failure") class Worker1: def __call__(self): return os.getpid() serve.create_backend("worker_failure:v1", Worker1) serve.set_traffic("worker_failure", {"worker_failure:v1": 1.0}) # Get the PID of the worker. old_pid = request_with_retries("/worker_failure", timeout=1).text # Kill the worker. handles = _get_worker_handles("worker_failure:v1") assert len(handles) == 1 ray.kill(handles[0], no_restart=False) # Wait until the worker is killed and a one is started. start = time.time() while time.time() - start < 30: response = request_with_retries("/worker_failure", timeout=30) if response.text != old_pid: break else: assert False, "Timed out waiting for worker to die."
def test_e2e(serve_instance): serve.init() serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"]) retry_count = 5 timeout_sleep = 0.5 while True: try: resp = requests.get( "http://127.0.0.1:8000/-/routes", timeout=0.5).json() assert resp == {"/api": ["endpoint", ["GET", "POST"]]} break except Exception as e: time.sleep(timeout_sleep) timeout_sleep *= 2 retry_count -= 1 if retry_count == 0: assert False, ("Route table hasn't been updated after 3 tries." "The latest error was {}").format(e) def function(flask_request): return {"method": flask_request.method} serve.create_backend("echo:v1", function) serve.set_traffic("endpoint", {"echo:v1": 1.0}) resp = requests.get("http://127.0.0.1:8000/api").json()["method"] assert resp == "GET" resp = requests.post("http://127.0.0.1:8000/api").json()["method"] assert resp == "POST"
def test_router_failure(serve_instance): serve.init() serve.create_endpoint("router_failure", "/router_failure") def function(): return "hello1" serve.create_backend("router_failure:v1", function) serve.set_traffic("router_failure", {"router_failure:v1": 1.0}) assert request_with_retries("/router_failure", timeout=5).text == "hello1" for _ in range(10): response = request_with_retries("/router_failure", timeout=30) assert response.text == "hello1" _kill_router() for _ in range(10): response = request_with_retries("/router_failure", timeout=30) assert response.text == "hello1" def function(): return "hello2" serve.create_backend("router_failure:v2", function) serve.set_traffic("router_failure", {"router_failure:v2": 1.0}) for _ in range(10): response = request_with_retries("/router_failure", timeout=30) assert response.text == "hello2"
def test_delete_endpoint(serve_instance, route): endpoint_name = "delete_endpoint" + str(route) serve.create_endpoint(endpoint_name, route=route) serve.delete_endpoint(endpoint_name) # Check that we can reuse a deleted endpoint name and route. serve.create_endpoint(endpoint_name, route=route) def function(): return "hello" serve.create_backend("delete-endpoint:v1", function) serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0}) if route is not None: assert requests.get( "http://127.0.0.1:8000/delete-endpoint").text == "hello" else: handle = serve.get_handle(endpoint_name) assert ray.get(handle.remote()) == "hello" # Check that deleting the endpoint doesn't delete the backend. serve.delete_endpoint(endpoint_name) serve.create_endpoint(endpoint_name, route=route) serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0}) if route is not None: assert requests.get( "http://127.0.0.1:8000/delete-endpoint").text == "hello" else: handle = serve.get_handle(endpoint_name) assert ray.get(handle.remote()) == "hello"
def test_batching(serve_instance): class BatchingExample: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, flask_request, temp=None): self.count += 1 batch_size = serve.context.batch_size return [self.count] * batch_size serve.create_endpoint("counter1", "/increment2") # Keep checking the routing table until /increment is populated while "/increment2" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) # set the max batch size serve.create_backend( "counter:v11", BatchingExample, config={"max_batch_size": 5}) serve.set_traffic("counter1", {"counter:v11": 1.0}) future_list = [] handle = serve.get_handle("counter1") for _ in range(20): f = handle.remote(temp=1) future_list.append(f) counter_result = ray.get(future_list) # since count is only updated per batch of queries # If there atleast one __call__ fn call with batch size greater than 1 # counter result will always be less than 20 assert max(counter_result) < 20
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False): print("Serving checkpoint: {}".format(checkpoint)) checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint, config, metrics) serve.init() backend_name = "mnist:day_{}".format(day) serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config, metrics, gpu) if "mnist" not in serve.list_endpoints(): # First time we serve a model - create endpoint serve.create_endpoint("mnist", backend=backend_name, route="/mnist", methods=["POST"]) else: # The endpoint already exists, route all traffic to the new model # Here you could also implement an incremental rollout, where only # a part of the traffic is sent to the new backend and the # rest is sent to the existing backends. serve.set_traffic("mnist", {backend_name: 1.0}) # Delete previous existing backends for existing_backend in serve.list_backends(): if existing_backend.startswith("mnist:day") and \ existing_backend != backend_name: serve.delete_backend(existing_backend) return True
def test_http_proxy_failure(serve_instance): serve.init() serve.create_endpoint("proxy_failure", "/proxy_failure", methods=["GET"]) def function(): return "hello1" serve.create_backend(function, "proxy_failure:v1") serve.set_traffic("proxy_failure", {"proxy_failure:v1": 1.0}) assert request_with_retries("/proxy_failure", timeout=0.1).text == "hello1" for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) assert response.text == "hello1" _kill_http_proxy() def function(): return "hello2" serve.create_backend(function, "proxy_failure:v2") serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0}) for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) assert response.text == "hello2"
def test_worker_replica_failure(serve_instance): serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0 serve.init() serve.create_endpoint( "replica_failure", "/replica_failure", methods=["GET"]) class Worker: # Assumes that two replicas are started. Will hang forever in the # constructor for any workers that are restarted. def __init__(self, path): self.should_hang = False if not os.path.exists(path): with open(path, "w") as f: f.write("1") else: with open(path, "r") as f: num = int(f.read()) with open(path, "w") as f: if num == 2: self.should_hang = True else: f.write(str(num + 1)) if self.should_hang: while True: pass def __call__(self): pass temp_path = tempfile.gettempdir() + "/" + serve.utils.get_random_letters() serve.create_backend(Worker, "replica_failure", temp_path) backend_config = serve.get_backend_config("replica_failure") backend_config.num_replicas = 2 serve.set_backend_config("replica_failure", backend_config) serve.set_traffic("replica_failure", {"replica_failure": 1.0}) # Wait until both replicas have been started. responses = set() while len(responses) == 1: responses.add( request_with_retries("/replica_failure", timeout=0.1).text) time.sleep(0.1) # Kill one of the replicas. handles = _get_worker_handles("replica_failure") assert len(handles) == 2 ray.kill(handles[0]) # Check that the other replica still serves requests. for _ in range(10): while True: try: # The timeout needs to be small here because the request to # the restarting worker will hang. request_with_retries("/replica_failure", timeout=0.1) break except TimeoutError: time.sleep(0.1)
def test_list_endpoints(serve_instance): serve.init() def f(): pass serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"]) serve.create_endpoint("endpoint2", methods=["POST"]) serve.create_backend("backend", f) serve.set_traffic("endpoint2", {"backend": 1.0}) endpoints = serve.list_endpoints() assert "endpoint" in endpoints assert endpoints["endpoint"] == { "route": "/api", "methods": ["GET", "POST"], "traffic": {} } assert "endpoint2" in endpoints assert endpoints["endpoint2"] == { "route": None, "methods": ["POST"], "traffic": { "backend": 1.0 } } serve.delete_endpoint("endpoint") assert "endpoint2" in serve.list_endpoints() serve.delete_endpoint("endpoint2") assert len(serve.list_endpoints()) == 0
def test_http_proxy_failure(serve_instance): serve.init() def function(): return "hello1" serve.create_backend("proxy_failure:v1", function) serve.create_endpoint("proxy_failure", backend="proxy_failure:v1", route="/proxy_failure") assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1" for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) assert response.text == "hello1" _kill_routers() def function(): return "hello2" serve.create_backend("proxy_failure:v2", function) serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0}) for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) assert response.text == "hello2"
def test_http_proxy_failure(serve_instance): def function(_): return "hello1" serve.create_backend("proxy_failure:v1", function) serve.create_endpoint("proxy_failure", backend="proxy_failure:v1", route="/proxy_failure") assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1" for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) assert response.text == "hello1" _kill_http_proxies() def function(_): return "hello2" serve.create_backend("proxy_failure:v2", function) serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0}) def check_new(): for _ in range(10): response = request_with_retries("/proxy_failure", timeout=30) if response.text != "hello2": return False return True wait_for_condition(check_new)
def test_set_traffic_missing_data(serve_instance): endpoint_name = "foobar" backend_name = "foo_backend" serve.create_backend(backend_name, lambda: 5) serve.create_endpoint(endpoint_name, backend=backend_name) with pytest.raises(ValueError): serve.set_traffic(endpoint_name, {"nonexistent_backend": 1.0}) with pytest.raises(ValueError): serve.set_traffic("nonexistent_endpoint_name", {backend_name: 1.0})
async def test_system_metric_endpoints(serve_instance): def test_error_counter(flask_request): 1 / 0 serve.create_backend("m:v1", test_error_counter) serve.create_endpoint("test_metrics", backend="m:v1", route="/measure") serve.set_traffic("test_metrics", {"m:v1": 1}) # Check metrics are exposed under http endpoint def test_metric_endpoint(): requests.get("http://127.0.0.1:8000/measure", timeout=5) in_memory_metric = requests.get( "http://127.0.0.1:8000/-/metrics", timeout=5).json() # We don't want to check the values since this check might be retried. in_memory_metric_without_values = [] for m in in_memory_metric: m.pop("value") in_memory_metric_without_values.append(m) target_metrics = [{ "info": { "name": "num_http_requests", "type": "MetricType.COUNTER", "route": "/measure" }, }, { "info": { "name": "num_router_requests", "type": "MetricType.COUNTER", "endpoint": "test_metrics" }, }, { "info": { "name": "backend_error_counter", "type": "MetricType.COUNTER", "backend": "m:v1" }, }] for target in target_metrics: assert target in in_memory_metric_without_values success = False for _ in range(3): try: test_metric_endpoint() success = True break except (AssertionError, requests.ReadTimeout): # Metrics may not have been propagated yet time.sleep(2) print("Metric not correct, retrying...") if not success: test_metric_endpoint()
def test_controller_failure(serve_instance): serve.init() def function(): return "hello1" serve.create_backend("controller_failure:v1", function) serve.create_endpoint( "controller_failure", backend="controller_failure:v1", route="/controller_failure") assert request_with_retries( "/controller_failure", timeout=1).text == "hello1" for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello1" ray.kill(serve.api._get_controller(), no_restart=False) for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello1" def function(): return "hello2" ray.kill(serve.api._get_controller(), no_restart=False) serve.create_backend("controller_failure:v2", function) serve.set_traffic("controller_failure", {"controller_failure:v2": 1.0}) for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello2" def function(): return "hello3" ray.kill(serve.api._get_controller(), no_restart=False) serve.create_backend("controller_failure_2", function) ray.kill(serve.api._get_controller(), no_restart=False) serve.create_endpoint( "controller_failure_2", backend="controller_failure_2", route="/controller_failure_2") ray.kill(serve.api._get_controller(), no_restart=False) for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello2" response = request_with_retries("/controller_failure_2", timeout=30) assert response.text == "hello3"
def test_no_route(serve_instance): serve.create_endpoint("noroute-endpoint") def func(_, i=1): return 1 serve.create_backend("backend:1", func) serve.set_traffic("noroute-endpoint", {"backend:1": 1.0}) service_handle = serve.get_handle("noroute-endpoint") result = ray.get(service_handle.remote(i=1)) assert result == 1
def test_controller_failure(serve_instance): def function(_): return "hello1" serve.create_backend("controller_failure:v1", function) serve.create_endpoint( "controller_failure", backend="controller_failure:v1", route="/controller_failure") assert request_with_retries( "/controller_failure", timeout=1).text == "hello1" for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello1" ray.kill(serve.api._global_client._controller, no_restart=False) for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello1" def function(_): return "hello2" ray.kill(serve.api._global_client._controller, no_restart=False) serve.create_backend("controller_failure:v2", function) serve.set_traffic("controller_failure", {"controller_failure:v2": 1.0}) def check_controller_failure(): response = request_with_retries("/controller_failure", timeout=30) return response.text == "hello2" wait_for_condition(check_controller_failure) def function(_): return "hello3" ray.kill(serve.api._global_client._controller, no_restart=False) serve.create_backend("controller_failure_2", function) ray.kill(serve.api._global_client._controller, no_restart=False) serve.create_endpoint( "controller_failure_2", backend="controller_failure_2", route="/controller_failure_2") ray.kill(serve.api._global_client._controller, no_restart=False) for _ in range(10): response = request_with_retries("/controller_failure", timeout=30) assert response.text == "hello2" response = request_with_retries("/controller_failure_2", timeout=30) assert response.text == "hello3"
def test_nonblocking(): serve.init() serve.create_endpoint("nonblocking", "/nonblocking") def function(flask_request): return {"method": flask_request.method} serve.create_backend(function, "nonblocking:v1") serve.set_traffic("nonblocking", {"nonblocking:v1": 1.0}) resp = requests.get("http://127.0.0.1:8000/nonblocking").json()["method"] assert resp == "GET"
def create_endpoint(self): if len(self.endpoints) == self.max_endpoints: endpoint_to_delete = self.endpoints.pop() serve.delete_endpoint(endpoint_to_delete) serve.delete_backend(endpoint_to_delete) new_endpoint = "".join( [random.choice(string.ascii_letters) for _ in range(10)]) def handler(self, *args): return new_endpoint serve.create_backend(new_endpoint, handler) serve.create_endpoint(new_endpoint, "/" + new_endpoint) serve.set_traffic(new_endpoint, {new_endpoint: 1.0}) self.endpoints.append(new_endpoint)
def test_batching_exception(serve_instance): class NoListReturned: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, flask_request, temp=None): batch_size = serve.context.batch_size return batch_size serve.create_endpoint("exception-test", "/noListReturned") # set the max batch size serve.create_backend( "exception:v1", NoListReturned, config={"max_batch_size": 5}) serve.set_traffic("exception-test", {"exception:v1": 1.0}) handle = serve.get_handle("exception-test") with pytest.raises(ray.exceptions.RayTaskError): assert ray.get(handle.remote(temp=1))
def test_call_method(serve_instance): serve.create_endpoint("endpoint", "/api") class CallMethod: def method(self, request): return "hello" serve.create_backend("backend", CallMethod) serve.set_traffic("endpoint", {"backend": 1.0}) # Test HTTP path. resp = requests.get("http://127.0.0.1:8000/api", timeout=1, headers={"X-SERVE-CALL-METHOD": "method"}) assert resp.text == "hello" # Test serve handle path. handle = serve.get_handle("endpoint") assert ray.get(handle.options("method").remote()) == "hello"
def test_cluster_name(): with pytest.raises(TypeError): serve.init(cluster_name=1) route = "/api" backend = "backend" endpoint = "endpoint" serve.init(cluster_name="cluster1", blocking=True, http_port=8001) serve.create_endpoint(endpoint, route=route) def function(): return "hello1" serve.create_backend(backend, function) serve.set_traffic(endpoint, {backend: 1.0}) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" # Create a second cluster on port 8002. Create an endpoint and backend with # the same names and check that they don't collide. serve.init(cluster_name="cluster2", blocking=True, http_port=8002) serve.create_endpoint(endpoint, route=route) def function(): return "hello2" serve.create_backend(backend, function) serve.set_traffic(endpoint, {backend: 1.0}) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" assert requests.get("http://127.0.0.1:8002" + route).text == "hello2" # Check that deleting the backend in the current cluster doesn't. serve.delete_endpoint(endpoint) serve.delete_backend(backend) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" # Check that we can re-connect to the first cluster. serve.init(cluster_name="cluster1") serve.delete_endpoint(endpoint) serve.delete_backend(backend)
def test_scaling_replicas(serve_instance): class Counter: def __init__(self): self.count = 0 def __call__(self, _): self.count += 1 return self.count serve.create_endpoint("counter", "/increment") # Keep checking the routing table until /increment is populated while "/increment" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) b_config = BackendConfig(num_replicas=2) serve.create_backend(Counter, "counter:v1", backend_config=b_config) serve.set_traffic("counter", {"counter:v1": 1.0}) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # If the load is shared among two replicas. The max result cannot be 10. assert max(counter_result) < 10 b_config = serve.get_backend_config("counter:v1") b_config.num_replicas = 1 serve.set_backend_config("counter:v1", b_config) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # Give some time for a replica to spin down. But majority of the request # should be served by the only remaining replica. assert max(counter_result) - min(counter_result) > 6
def test_handle_in_endpoint(serve_instance): serve.init() class Endpoint1: def __call__(self, flask_request): return "hello" class Endpoint2: def __init__(self): self.handle = serve.get_handle("endpoint1", missing_ok=True) def __call__(self): return ray.get(self.handle.remote()) serve.create_endpoint("endpoint1", "/endpoint1", methods=["GET", "POST"]) serve.create_backend(Endpoint1, "endpoint1:v0") serve.set_traffic("endpoint1", {"endpoint1:v0": 1.0}) serve.create_endpoint("endpoint2", "/endpoint2", methods=["GET", "POST"]) serve.create_backend(Endpoint2, "endpoint2:v0") serve.set_traffic("endpoint2", {"endpoint2:v0": 1.0}) assert requests.get("http://127.0.0.1:8000/endpoint2").text == "hello"
def test_parallel_start(serve_instance): # Test the ability to start multiple replicas in parallel. # In the past, when Serve scale up a backend, it does so one by one and # wait for each replica to initialize. This test avoid this by preventing # the first replica to finish initialization unless the second replica is # also started. @ray.remote class Barrier: def __init__(self, release_on): self.release_on = release_on self.current_waiters = 0 self.event = asyncio.Event() async def wait(self): self.current_waiters += 1 if self.current_waiters == self.release_on: self.event.set() else: await self.event.wait() barrier = Barrier.remote(release_on=2) class LongStartingServable: def __init__(self): ray.get(barrier.wait.remote(), timeout=10) def __call__(self, _): return "Ready" serve.create_endpoint("test-parallel") serve.create_backend("p:v0", LongStartingServable, config={"num_replicas": 2}) serve.set_traffic("test-parallel", {"p:v0": 1}) handle = serve.get_handle("test-parallel") ray.get(handle.remote(), timeout=10)
def test_shard_key(serve_instance, route): # Create five backends that return different integers. num_backends = 5 traffic_dict = {} for i in range(num_backends): def function(): return i backend_name = "backend-split-" + str(i) traffic_dict[backend_name] = 1.0 / num_backends serve.create_backend(backend_name, function) serve.create_endpoint("endpoint", backend=list(traffic_dict.keys())[0], route=route) serve.set_traffic("endpoint", traffic_dict) def do_request(shard_key): if route is not None: url = "http://127.0.0.1:8000" + route headers = {"X-SERVE-SHARD-KEY": shard_key} result = requests.get(url, headers=headers).text else: handle = serve.get_handle("endpoint").options(shard_key=shard_key) result = ray.get(handle.options(shard_key=shard_key).remote()) return result # Send requests with different shard keys and log the backends they go to. shard_keys = [get_random_letters() for _ in range(20)] results = {} for shard_key in shard_keys: results[shard_key] = do_request(shard_key) # Check that the shard keys are mapped to the same backends. for shard_key in shard_keys: assert do_request(shard_key) == results[shard_key]
def test_delete_backend(serve_instance): serve.create_endpoint("delete_backend", "/delete-backend") def function(): return "hello" serve.create_backend("delete:v1", function) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello" # Check that we can't delete the backend while it's in use. with pytest.raises(ValueError): serve.delete_backend("delete:v1") serve.create_backend("delete:v2", function) serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5}) with pytest.raises(ValueError): serve.delete_backend("delete:v1") # Check that the backend can be deleted once it's no longer in use. serve.set_traffic("delete_backend", {"delete:v2": 1.0}) serve.delete_backend("delete:v1") # Check that we can no longer use the previously deleted backend. with pytest.raises(ValueError): serve.set_traffic("delete_backend", {"delete:v1": 1.0}) def function2(): return "olleh" # Check that we can now reuse the previously delete backend's tag. serve.create_backend("delete:v1", function2) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) assert requests.get("http://127.0.0.1:8000/delete-backend").text == "olleh"
def test_delete_backend(serve_instance): def function(_): return "hello" serve.create_backend("delete:v1", function) serve.create_endpoint("delete_backend", backend="delete:v1", route="/delete-backend") assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello" # Check that we can't delete the backend while it's in use. with pytest.raises(ValueError): serve.delete_backend("delete:v1") serve.create_backend("delete:v2", function) serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5}) with pytest.raises(ValueError): serve.delete_backend("delete:v1") # Check that the backend can be deleted once it's no longer in use. serve.set_traffic("delete_backend", {"delete:v2": 1.0}) serve.delete_backend("delete:v1") # Check that we can no longer use the previously deleted backend. with pytest.raises(ValueError): serve.set_traffic("delete_backend", {"delete:v1": 1.0}) def function2(_): return "olleh" # Check that we can now reuse the previously delete backend's tag. serve.create_backend("delete:v1", function2) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) for _ in range(10): try: assert requests.get( "http://127.0.0.1:8000/delete-backend").text == "olleh" break except AssertionError: time.sleep(0.5) # wait for the traffic policy to propogate else: assert requests.get( "http://127.0.0.1:8000/delete-backend").text == "olleh"
def test_master_failure(serve_instance): serve.init() serve.create_endpoint("master_failure", "/master_failure") def function(): return "hello1" serve.create_backend("master_failure:v1", function) serve.set_traffic("master_failure", {"master_failure:v1": 1.0}) assert request_with_retries("/master_failure", timeout=1).text == "hello1" for _ in range(10): response = request_with_retries("/master_failure", timeout=30) assert response.text == "hello1" ray.kill(serve.api._get_master_actor()) for _ in range(10): response = request_with_retries("/master_failure", timeout=30) assert response.text == "hello1" def function(): return "hello2" ray.kill(serve.api._get_master_actor()) serve.create_backend("master_failure:v2", function) serve.set_traffic("master_failure", {"master_failure:v2": 1.0}) for _ in range(10): response = request_with_retries("/master_failure", timeout=30) assert response.text == "hello2" def function(): return "hello3" ray.kill(serve.api._get_master_actor()) serve.create_endpoint("master_failure_2", "/master_failure_2") ray.kill(serve.api._get_master_actor()) serve.create_backend("master_failure_2", function) ray.kill(serve.api._get_master_actor()) serve.set_traffic("master_failure_2", {"master_failure_2": 1.0}) for _ in range(10): response = request_with_retries("/master_failure", timeout=30) assert response.text == "hello2" response = request_with_retries("/master_failure_2", timeout=30) assert response.text == "hello3"
def echo_v1(_): return "v1" def echo_v2(_): return "v2" serve.start() serve.create_backend("echo:v1", echo_v1) serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") for _ in range(3): resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2) serve.create_backend("echo:v2", echo_v2) serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True: resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2)
import ray from ray import serve class ComposedModel: def __init__(self): self.detector_handle = serve.get_handle("object") self.alpr_handle = serve.get_handle("alpr") async def __call__(self, flask_request): image_data = flask_request.data object_found = await self.detector_handle.remote(data=image_data) if object_found["label"] != "car": return {"contains_car": False} if object_found["score"] < 0.4: return {"contain_car": False} license_plate = await self.alpr_handle.remote(data=image_data) return {"contains_car": True, "license_plate": license_plate} ray.init(address="auto") serve.init() serve.create_endpoint("composed", "/composed", methods=["POST"]) serve.create_backend("composed:v1", ComposedModel, config={"num_replicas": 2}) serve.set_traffic("composed", {"composed:v1": 1})