def test_list_endpoints(serve_instance): serve.init() def f(): pass serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"]) serve.create_endpoint("endpoint2", methods=["POST"]) serve.create_backend("backend", f) serve.set_traffic("endpoint2", {"backend": 1.0}) endpoints = serve.list_endpoints() assert "endpoint" in endpoints assert endpoints["endpoint"] == { "route": "/api", "methods": ["GET", "POST"], "traffic": {} } assert "endpoint2" in endpoints assert endpoints["endpoint2"] == { "route": None, "methods": ["POST"], "traffic": { "backend": 1.0 } } serve.delete_endpoint("endpoint") assert "endpoint2" in serve.list_endpoints() serve.delete_endpoint("endpoint2") assert len(serve.list_endpoints()) == 0
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False): print("Serving checkpoint: {}".format(checkpoint)) checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint, config, metrics) serve.init() backend_name = "mnist:day_{}".format(day) serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config, metrics, gpu) if "mnist" not in serve.list_endpoints(): # First time we serve a model - create endpoint serve.create_endpoint("mnist", backend=backend_name, route="/mnist", methods=["POST"]) else: # The endpoint already exists, route all traffic to the new model # Here you could also implement an incremental rollout, where only # a part of the traffic is sent to the new backend and the # rest is sent to the existing backends. serve.set_traffic("mnist", {backend_name: 1.0}) # Delete previous existing backends for existing_backend in serve.list_backends(): if existing_backend.startswith("mnist:day") and \ existing_backend != backend_name: serve.delete_backend(existing_backend) return True
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance) start = """ import ray ray.util.connect("{}") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://localhost:8000/hello").text == "hello" delete = """ import ray ray.util.connect("{}") from ray import serve serve.get_deployment("test1").delete() """.format(ray_client_instance) run_string_as_driver(delete) assert "test1" not in serve.list_backends() assert "test1" not in serve.list_endpoints()
def main(num_replicas: Optional[int], trial_length: Optional[str], max_batch_size: Optional[int]): # Give default cluster parameter values based on smoke_test config # if user provided values explicitly, use them instead. # IS_SMOKE_TEST is set by args of releaser's e2e.py smoke_test = os.environ.get("IS_SMOKE_TEST", "1") if smoke_test == "1": num_replicas = num_replicas or DEFAULT_SMOKE_TEST_NUM_REPLICA trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH logger.info( f"Running local / smoke test with {num_replicas} replicas ..\n") # Choose cluster setup based on user config. Local test uses Cluster() # to mock actors that requires # of nodes to be specified, but ray # client doesn't need to num_nodes = int(math.ceil(num_replicas / NUM_CPU_PER_NODE)) logger.info( f"Setting up local ray cluster with {num_nodes} nodes ..\n") serve_client = setup_local_single_node_cluster(num_nodes) else: num_replicas = num_replicas or DEFAULT_FULL_TEST_NUM_REPLICA trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH logger.info(f"Running full test with {num_replicas} replicas ..\n") logger.info("Setting up anyscale ray cluster .. \n") serve_client = setup_anyscale_cluster() http_host = str(serve_client._http_config.host) http_port = str(serve_client._http_config.port) logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}") logger.info(f"Deploying with {num_replicas} target replicas ....\n") deploy_replicas(num_replicas, max_batch_size) logger.info("Warming up cluster ....\n") warm_up_one_cluster.remote(10, http_host, http_port, "echo") logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n") # For detailed discussion, see https://github.com/wg/wrk/issues/205 # TODO:(jiaodong) What's the best number to use here ? all_endpoints = list(serve.list_endpoints().keys()) all_metrics, all_wrk_stdout = run_wrk_on_all_nodes( trial_length, NUM_CONNECTIONS, http_host, http_port, all_endpoints=all_endpoints) aggregated_metrics = aggregate_all_metrics(all_metrics) logger.info("Wrk stdout on each node: ") for wrk_stdout in all_wrk_stdout: logger.info(wrk_stdout) logger.info("Final aggregated metrics: ") for key, val in aggregated_metrics.items(): logger.info(f"{key}: {val}") save_test_results( aggregated_metrics, default_output_file="/tmp/single_deployment_1k_noop_replica.json")
def test_list_endpoints(serve_instance): def f(): pass serve.create_backend("backend", f) serve.create_backend("backend2", f) serve.create_backend("backend3", f) serve.create_endpoint("endpoint", backend="backend", route="/api", methods=["GET", "POST"]) serve.create_endpoint("endpoint2", backend="backend2", methods=["POST"]) serve.shadow_traffic("endpoint", "backend3", 0.5) endpoints = serve.list_endpoints() assert "endpoint" in endpoints assert endpoints["endpoint"] == { "route": "/api", "methods": ["GET", "POST"], "traffic": { "backend": 1.0 }, "shadows": { "backend3": 0.5 }, "python_methods": [], } assert "endpoint2" in endpoints assert endpoints["endpoint2"] == { "route": None, "methods": ["POST"], "traffic": { "backend2": 1.0 }, "shadows": {}, "python_methods": [], } serve.delete_endpoint("endpoint") assert "endpoint2" in serve.list_endpoints() serve.delete_endpoint("endpoint2") assert len(serve.list_endpoints()) == 0
def fetch_resources(self): if not self.client: return {} try: configs = serve.list_endpoints().items() except AttributeError: return {} backend_to_endpoints = {} for endpoint_name, endpoint_config in configs: backend = list(endpoint_config["traffic"].keys())[0] backend_to_endpoints.setdefault(backend, []).append(endpoint_name) if not len(backend_to_endpoints.keys()): return {} resources = ray.get(self.counter.read.remote()) if self.tui: self.tui.resources_by_endpoint = resources return resources
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance, namespace="") start = """ import ray ray.util.connect("{}", namespace="") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}", namespace="") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://*****:*****@app.get("/") def hello(): return "hello" @serve.deployment @serve.ingress(app) class A: pass A.deploy() """.format(ray_client_instance) run_string_as_driver(fastapi) assert requests.get("http://localhost:8000/A").json() == "hello"