def test_shutdown(serve_instance): def f(): pass instance_name = "shutdown" serve.init(name=instance_name, http_port=8002) serve.create_backend("backend", f) serve.create_endpoint("endpoint", backend="backend") serve.shutdown() with pytest.raises(RayServeException, match="Please run serve.init"): serve.list_backends() def check_dead(): for actor_name in [ constants.SERVE_MASTER_NAME, constants.SERVE_PROXY_NAME, constants.SERVE_ROUTER_NAME, constants.SERVE_METRIC_SINK_NAME ]: try: ray.get_actor(format_actor_name(actor_name, instance_name)) return False except ValueError: pass return True assert wait_for_condition(check_dead)
def test_list_backends(serve_instance): serve.init() @serve.accept_batch def f(): pass serve.create_backend("backend", f, config={"max_batch_size": 10}) backends = serve.list_backends() assert len(backends) == 1 assert "backend" in backends assert backends["backend"]["max_batch_size"] == 10 serve.create_backend("backend2", f, config={"num_replicas": 10}) backends = serve.list_backends() assert len(backends) == 2 assert backends["backend2"]["num_replicas"] == 10 serve.delete_backend("backend") backends = serve.list_backends() assert len(backends) == 1 assert "backend2" in backends serve.delete_backend("backend2") assert len(serve.list_backends()) == 0
def test_list_backends(serve_instance): @serve.accept_batch def f(): pass config1 = BackendConfig(max_batch_size=10) serve.create_backend("backend", f, config=config1) backends = serve.list_backends() assert len(backends) == 1 assert "backend" in backends assert backends["backend"].max_batch_size == 10 config2 = BackendConfig(num_replicas=10) serve.create_backend("backend2", f, config=config2) backends = serve.list_backends() assert len(backends) == 2 assert backends["backend2"].num_replicas == 10 serve.delete_backend("backend") backends = serve.list_backends() assert len(backends) == 1 assert "backend2" in backends serve.delete_backend("backend2") assert len(serve.list_backends()) == 0
def test_start_idempotent(serve_instance): @serve.deployment(name="start") def func(*args): pass func.deploy() assert "start" in serve.list_backends() serve.start(detached=True) serve.start() serve.start(detached=True) serve.start() assert "start" in serve.list_backends()
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False): print("Serving checkpoint: {}".format(checkpoint)) checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint, config, metrics) serve.init() backend_name = "mnist:day_{}".format(day) serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config, metrics, gpu) if "mnist" not in serve.list_endpoints(): # First time we serve a model - create endpoint serve.create_endpoint("mnist", backend=backend_name, route="/mnist", methods=["POST"]) else: # The endpoint already exists, route all traffic to the new model # Here you could also implement an incremental rollout, where only # a part of the traffic is sent to the new backend and the # rest is sent to the existing backends. serve.set_traffic("mnist", {backend_name: 1.0}) # Delete previous existing backends for existing_backend in serve.list_backends(): if existing_backend.startswith("mnist:day") and \ existing_backend != backend_name: serve.delete_backend(existing_backend) return True
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance) start = """ import ray ray.util.connect("{}") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://localhost:8000/hello").text == "hello" delete = """ import ray ray.util.connect("{}") from ray import serve serve.get_deployment("test1").delete() """.format(ray_client_instance) run_string_as_driver(delete) assert "test1" not in serve.list_backends() assert "test1" not in serve.list_endpoints()
def test_shutdown(ray_shutdown): ray.init(num_cpus=16) serve.start(http_options=dict(port=8003)) @serve.deployment def f(): pass f.deploy() actor_names = [ serve.api._global_client._controller_name, format_actor_name(SERVE_PROXY_NAME, serve.api._global_client._controller_name, get_all_node_ids()[0][0]) ] def check_alive(): alive = True for actor_name in actor_names: try: ray.get_actor(actor_name) except ValueError: alive = False return alive wait_for_condition(check_alive) serve.shutdown() with pytest.raises(RayServeException): serve.list_backends() def check_dead(): for actor_name in actor_names: try: ray.get_actor(actor_name) return False except ValueError: pass return True wait_for_condition(check_dead)
def test_connect(detached, ray_shutdown): # Check that you can make API calls from within a deployment for both # detached and non-detached instances. ray.init(num_cpus=16, namespace="serve") serve.start(detached=detached) @serve.deployment def connect_in_backend(*args): connect_in_backend.options(name="backend-ception").deploy() connect_in_backend.deploy() ray.get(connect_in_backend.get_handle().remote()) assert "backend-ception" in serve.list_backends()
def test_connect(detached, ray_shutdown): # Check that you can call serve.connect() from within a backend for both # detached and non-detached instances. ray.init(num_cpus=16) serve.start(detached=detached) def connect_in_backend(_): serve.create_backend("backend-ception", connect_in_backend) serve.create_backend("connect_in_backend", connect_in_backend) serve.create_endpoint("endpoint", backend="connect_in_backend") ray.get(serve.get_handle("endpoint").remote()) assert "backend-ception" in serve.list_backends().keys()
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance, namespace="") start = """ import ray ray.util.connect("{}", namespace="") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}", namespace="") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://*****:*****@app.get("/") def hello(): return "hello" @serve.deployment @serve.ingress(app) class A: pass A.deploy() """.format(ray_client_instance) run_string_as_driver(fastapi) assert requests.get("http://localhost:8000/A").json() == "hello"