def test_snapshot_always_written_to_internal_kv( ray_start_with_dashboard, ray_shutdown # noqa: F811 ): # https://github.com/ray-project/ray/issues/19752 _, tmp_path = mkstemp() @serve.deployment() def hello(_): return "hello" def check(): try: resp = requests.get("http://localhost:8000/hello") assert resp.text == "hello" return True except Exception: return False serve.start(detached=True, _checkpoint_path=f"file://{tmp_path}") hello.deploy() check() webui_url = ray_start_with_dashboard["webui_url"] def get_deployment_snapshot(): snapshot = requests.get(f"http://{webui_url}/api/snapshot").json()["data"][ "snapshot" ] return snapshot["deployments"] # Make sure /api/snapshot return non-empty deployment status. def verify_snapshot(): return get_deployment_snapshot() != {} wait_for_condition(verify_snapshot) # Sanity check the snapshot is correct snapshot = get_deployment_snapshot() assert len(snapshot) == 1 hello_deployment = list(snapshot.values())[0] assert hello_deployment["name"] == "hello" assert hello_deployment["status"] == "RUNNING"
def _shared_serve_instance(): # Uncomment the line below to turn on debug log for tests. # os.environ["SERVE_LOG_DEBUG"] = "1" # Overriding task_retry_delay_ms to relaunch actors more quickly ray.init(num_cpus=36, _metrics_export_port=9999, _system_config={ "metrics_report_interval_ms": 1000, "task_retry_delay_ms": 50 }) yield serve.start(detached=True)
def test_start_idempotent(serve_instance): @serve.deployment(name="start") def func(*args): pass func.deploy() assert "start" in serve.list_backends() serve.start(detached=True) serve.start() serve.start(detached=True) serve.start() assert "start" in serve.list_backends()
def test_update_num_replicas_anonymous_namespace(shutdown_ray, detached): """Test updating num_replicas with anonymous namespace.""" ray.init() serve.start(detached=detached) @serve.deployment(num_replicas=1) def f(*args): return "got f" f.deploy() num_actors = len(ray.util.list_named_actors(all_namespaces=True)) for _ in range(5): f.deploy() assert num_actors == len( ray.util.list_named_actors(all_namespaces=True)) serve.shutdown()
def start( address, http_host, http_port, http_location, checkpoint_path, ): ray.init( address=address, namespace=SERVE_NAMESPACE, ) serve.start( detached=True, http_options=dict( host=http_host, port=http_port, location=http_location, ), _checkpoint_path=checkpoint_path, )
async def startup_event(): ray.init(address="auto") # Connect to the running Ray cluster. serve.start(http_host=None) # Start the Ray Serve instance. # Define a callable class to use for our Ray Serve backend. class GPT2: def __init__(self): self.nlp_model = pipeline("text-generation", model="gpt2") async def __call__(self, request): return self.nlp_model(await request.body(), max_length=50) # Set up a Ray Serve backend with the desired number of replicas. backend_config = serve.BackendConfig(num_replicas=2) serve.create_backend("gpt-2", GPT2, config=backend_config) serve.create_endpoint("generate", backend="gpt-2") # Get a handle to our Ray Serve endpoint so we can query it in Python. global serve_handle serve_handle = serve.get_handle("generate")
def test_serve_shutdown(ray_shutdown): serve.start(detached=True) @serve.deployment class A: def __call__(self, *args): return "hi" A.deploy() assert len(serve.list_deployments()) == 1 serve.shutdown() serve.start(detached=True) assert len(serve.list_deployments()) == 0 A.deploy() assert len(serve.list_deployments()) == 1
def test_deploy_with_overriden_namespace(shutdown_ray, detached): """Test deployments with overriden namespace.""" ray_namespace = "ray_namespace" controller_namespace = "controller_namespace" ray.init(namespace=ray_namespace) serve.start(detached=detached, _override_controller_namespace=controller_namespace) for iteration in range(2): @serve.deployment def f(*args): return f"{iteration}" f.deploy() assert requests.get("http://localhost:8000/f").text == f"{iteration}" serve.shutdown()
def test_http_head_only(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) ray.init(head_node.address) node_ids = ray.state.node_ids() assert len(node_ids) == 2 serve.start(http_options={"port": new_port(), "location": "HeadOnly"}) # Only the controller and head node actor should be started assert len(ray.state.actors()) == 2 # They should all be placed on the head node cpu_per_nodes = { r["CPU"] for r in ray.state.state._available_resources_per_node().values() } assert cpu_per_nodes == {4, 4}
def test_shutdown(ray_shutdown): def f(): pass ray.init(num_cpus=16) serve.start(http_port=8003) serve.create_backend("backend", f) serve.create_endpoint("endpoint", backend="backend") actor_names = [ serve.api._global_client._controller_name, format_actor_name(SERVE_PROXY_NAME, serve.api._global_client._controller_name, get_all_node_ids()[0][0]) ] def check_alive(): alive = True for actor_name in actor_names: try: ray.get_actor(actor_name) except ValueError: alive = False return alive wait_for_condition(check_alive) serve.shutdown() with pytest.raises(RayServeException): serve.list_backends() def check_dead(): for actor_name in actor_names: try: ray.get_actor(actor_name) return False except ValueError: pass return True wait_for_condition(check_dead)
def test_no_http(serve_instance): client = serve.start(http_host=None) assert len(ray.get(client._controller.get_http_proxies.remote())) == 0 def hello(*args): return "hello" client.create_backend("backend", hello) client.create_endpoint("endpoint", backend="backend") assert ray.get(client.get_handle("endpoint").remote()) == "hello"
def test_controller_starts_java_replica(shutdown_only): # noqa: F811 ray.init( num_cpus=8, namespace="default_test_namespace", # A dummy code search path to enable cross language. job_config=JobConfig(code_search_path=["."]), ) client = serve.start(detached=True) controller = client._controller config = DeploymentConfig() config.deployment_language = JAVA config.is_cross_language = True replica_config = ReplicaConfig.create( "io.ray.serve.util.ExampleEchoDeployment", init_args=["my_prefix "], ) # Deploy it deployment_name = "my_java" updating = ray.get( controller.deploy.remote( name=deployment_name, deployment_config_proto_bytes=config.to_proto_bytes(), replica_config_proto_bytes=replica_config.to_proto_bytes(), route_prefix=None, deployer_job_id=ray.get_runtime_context().job_id, ) ) assert updating client._wait_for_deployment_healthy(deployment_name) # Let's try to call it! all_handles = ray.get(controller._all_running_replicas.remote()) backend_handle = all_handles["my_java"][0].actor_handle out = backend_handle.handleRequest.remote( RequestMetadata( request_id="id-1", endpoint="endpoint", call_method="call", ).SerializeToString(), RequestWrapper(body=msgpack_serialize("hello")).SerializeToString(), ) assert ray.get(out) == "my_prefix hello" handle = serve.get_deployment("my_java").get_handle() handle_out = handle.remote("hello handle") assert ray.get(handle_out) == "my_prefix hello handle" ray.get(controller.delete_deployment.remote(deployment_name)) client._wait_for_deployment_deleted(deployment_name)
def test_deploy_function_no_params_call_with_param(serve_instance, use_async): serve.start() if use_async: expected_output = "async!" deployment_cls = async_d else: expected_output = "sync!" deployment_cls = sync_d handle = serve.run(deployment_cls.bind()) assert (requests.get(f"http://localhost:8000/{deployment_cls.name}").text == expected_output) with pytest.raises( TypeError, match=r"\(\) takes 0 positional arguments but 1 was given"): assert ray.get(handle.remote(1)) == expected_output with pytest.raises(TypeError, match=r"\(\) got an unexpected keyword argument"): assert ray.get(handle.remote(key=1)) == expected_output
def test_serve_namespace(ray_start_stop): """ Check that the Dashboard's Serve can interact with the Python API when they both start in the "serve namespace" """ one = dict( name="one", num_replicas=1, route_prefix="/one", ray_actor_options={"runtime_env": {"py_modules": [test_module_uri]}}, import_path="test_module.test.one", ) put_response = requests.put(GET_OR_PUT_URL, json={"deployments": [one]}, timeout=30) assert put_response.status_code == 200 ray.init(address="auto", namespace="serve") serve.start() deployments = serve.list_deployments() assert len(deployments) == 1 assert "one" in deployments serve.shutdown()
def test_http_root_url(ray_shutdown): @serve.deployment def f(_): pass root_url = "https://my.domain.dev/prefix" port = new_port() os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url serve.start(http_options=dict(port=port)) f.deploy() assert f.url == root_url + "/f" serve.shutdown() del os.environ[SERVE_ROOT_URL_ENV_KEY] port = new_port() serve.start(http_options=dict(port=port)) f.deploy() assert f.url != root_url + "/f" assert f.url == f"http://127.0.0.1:{port}/f" serve.shutdown()
def test_serve_shutdown(ray_shutdown): ray.init(namespace="serve") serve.start(detached=True) @serve.deployment class A: def __call__(self, *args): return "hi" serve.run(A.bind()) assert len(serve.list_deployments()) == 1 serve.shutdown() serve.start(detached=True) assert len(serve.list_deployments()) == 0 serve.run(A.bind()) assert len(serve.list_deployments()) == 1
def test_quickstart_counter(serve_with_client): serve.start() @serve.deployment class Counter: def __init__(self): self.count = 0 def __call__(self, *args): self.count += 1 return {"count": self.count} # Deploy our class. Counter.deploy() print("deploy finished") # Query our endpoint in two different ways: from HTTP and from Python. assert requests.get("http://127.0.0.1:8000/Counter").json() == {"count": 1} print("query 1 finished") assert ray.get(Counter.get_handle().remote()) == {"count": 2} print("query 2 finished")
def test_serve_controller_namespace(ray_shutdown, namespace: Optional[str], detached: bool): """ Tests the serve controller is started in the current namespace if not anonymous or in the "serve" namespace if no namespace is specified. When the controller is started in the "serve" namespace, this also tests that we can get the serve controller from another namespace. """ ray.init(namespace=namespace) serve.start(detached=detached) client = serve.api._global_client if namespace: controller_namespace = namespace elif detached: controller_namespace = "serve" else: controller_namespace = ray.get_runtime_context().namespace assert ray.get_actor(client._controller_name, namespace=controller_namespace)
def test_scale_up(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=3) @serve.deployment("D", version="1", num_replicas=1) def D(*args): return os.getpid() def get_pids(expected, timeout=30): pids = set() start = time.time() while len(pids) < expected: pids.add(requests.get("http://localhost:8000/D").text) if time.time() - start >= timeout: raise TimeoutError("Timed out waiting for pids.") return pids ray.init(head_node.address) serve.start(detached=True) client = serve.connect() D.deploy() pids1 = get_pids(1) goal_ref = D.options(num_replicas=3).deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) assert get_pids(1) == pids1 # Add a node with another CPU, another replica should get placed. cluster.add_node(num_cpus=1) assert not client._wait_for_goal(goal_ref, timeout=0.1) pids2 = get_pids(2) assert pids1.issubset(pids2) # Add a node with another CPU, the final replica should get placed # and the deploy goal should be done. cluster.add_node(num_cpus=1) assert client._wait_for_goal(goal_ref) pids3 = get_pids(3) assert pids2.issubset(pids3)
def test_fixed_number_proxies(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) ray.init(head_node.address) node_ids = ray._private.state.node_ids() assert len(node_ids) == 3 with pytest.raises( pydantic.ValidationError, match="you must specify the `fixed_number_replicas` parameter.", ): serve.start( http_options={ "location": "FixedNumber", } ) serve.start( http_options={ "port": new_port(), "location": "FixedNumber", "fixed_number_replicas": 2, } ) # Only the controller and two http proxy should be started. controller_handle = get_global_client()._controller node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote()) assert len(node_to_http_actors) == 2 proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote()) proxy_names = ActorNameList.FromString(proxy_names_bytes) assert len(proxy_names.names) == 2 serve.shutdown() ray.shutdown() cluster.shutdown()
def test_http_root_url(ray_shutdown): @serve.deployment def f(_): pass root_url = "https://my.domain.dev/prefix" port = new_port() os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url == root_url + "/f" serve.shutdown() ray.shutdown() del os.environ[SERVE_ROOT_URL_ENV_KEY] port = new_port() serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url != root_url + "/f" assert f.url == f"http://127.0.0.1:{port}/f" serve.shutdown() ray.shutdown() ray.init(runtime_env={"env_vars": {SERVE_ROOT_URL_ENV_KEY: root_url}}) port = new_port() serve.start(http_options=dict(port=port)) serve.run(f.bind()) assert f.url == root_url + "/f" serve.shutdown() ray.shutdown()
def test_deployment_to_schema_to_deployment(): @serve.deployment( num_replicas=3, route_prefix="/hello", ray_actor_options={ "runtime_env": { "working_dir": ("https://github.com/shrekris-anyscale/" "test_module/archive/HEAD.zip"), "py_modules": [ ("https://github.com/shrekris-anyscale/" "test_deploy_group/archive/HEAD.zip"), ], } }, ) def f(): # The body of this function doesn't matter. It gets replaced by # global_f() when the import path in f._func_or_class is overwritten. # This function is used as a convenience to apply the @serve.deployment # decorator without converting global_f() into a Deployment object. pass f._func_or_class = "ray.dashboard.modules.serve.tests.test_schema.global_f" deployment = schema_to_deployment(deployment_to_schema(f)) assert deployment.num_replicas == 3 assert deployment.route_prefix == "/hello" assert deployment.ray_actor_options["runtime_env"]["working_dir"] == ( "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip") assert deployment.ray_actor_options["runtime_env"]["py_modules"] == [ "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip", "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip", ] serve.start() deployment.deploy() assert ray.get(deployment.get_handle().remote()) == "Hello world!" assert requests.get("http://localhost:8000/hello").text == "Hello world!" serve.shutdown()
def run( config_or_import_path: str, runtime_env: str, runtime_env_json: str, working_dir: str, app_dir: str, address: str, host: str, port: int, blocking: bool, ): sys.path.insert(0, app_dir) final_runtime_env = parse_runtime_env_args( runtime_env=runtime_env, runtime_env_json=runtime_env_json, working_dir=working_dir, ) if pathlib.Path(config_or_import_path).is_file(): config_path = config_or_import_path cli_logger.print(f'Deploying from config file: "{config_path}".') with open(config_path, "r") as config_file: config = ServeApplicationSchema.parse_obj(yaml.safe_load(config_file)) is_config = True else: import_path = config_or_import_path cli_logger.print(f'Deploying from import path: "{import_path}".') node = import_attr(import_path) is_config = False # Setting the runtime_env here will set defaults for the deployments. ray.init(address=address, namespace=SERVE_NAMESPACE, runtime_env=final_runtime_env) client = serve.start(detached=True) try: if is_config: client.deploy_app(config) else: serve.run(node, host=host, port=port) cli_logger.success("Deployed successfully.") if blocking: while True: # Block, letting Ray print logs to the terminal. time.sleep(10) except KeyboardInterrupt: cli_logger.info("Got KeyboardInterrupt, shutting down...") serve.shutdown() sys.exit()
def test_fastapi_serialization(shutdown_ray): # https://github.com/ray-project/ray/issues/15511 app = FastAPI() @serve.deployment(name="custom_service") @serve.ingress(app) class CustomService: def deduplicate(self, data): data.drop_duplicates(inplace=True) return data @app.post("/deduplicate") def _deduplicate(self, request): data = request["data"] columns = request["columns"] import pandas as pd data = pd.DataFrame(data, columns=columns) data.drop_duplicates(inplace=True) return data.values.tolist() serve.start() CustomService.deploy()
def test_http_root_path(ray_shutdown): @serve.deployment def hello(): return "hello" port = new_port() root_path = "/serve" serve.start(http_options=dict(root_path=root_path, port=port)) hello.deploy() # check whether url is prefixed correctly assert hello.url == f"http://127.0.0.1:{port}{root_path}/hello" # check routing works as expected resp = requests.get(hello.url) assert resp.status_code == 200 assert resp.text == "hello" # check advertized routes are prefixed correctly resp = requests.get(f"http://127.0.0.1:{port}{root_path}/-/routes") assert resp.status_code == 200 assert resp.json() == {"/hello": "hello"}
def main(num_replicas: int, num_queries: Optional[int], max_concurrent_queries: Optional[int], blocking: bool): serve.start() print(f"num_replicas={num_replicas}") print(f"max_concurrent_queries={max_concurrent_queries}") @serve.deployment(num_replicas=num_replicas, max_concurrent_queries=max_concurrent_queries) def noop(_): return "hello world" noop.deploy() url = "{}/noop".format(DEFAULT_HTTP_ADDRESS) if num_queries: run_http_benchmark(url, num_queries) if blocking: print("Endpoint {} is ready.".format(url)) while True: time.sleep(5)
def serve_ha(external_redis, monkeypatch): # noqa: F811 monkeypatch.setenv("RAY_SERVE_KV_TIMEOUT_S", "1") address_info = ray.init( num_cpus=36, namespace="default_test_namespace", _metrics_export_port=9999, _system_config={ "metrics_report_interval_ms": 1000, "task_retry_delay_ms": 50 }, ) yield (address_info, serve.start(detached=True)) ray.shutdown()
def test_deploy_async_class_no_params(serve_instance): @serve.deployment class AsyncCounter: async def __init__(self): await asyncio.sleep(5) self.count = 0 async def __call__(self): self.count += 1 await asyncio.sleep(5) return {"count": self.count} serve.start() AsyncCounter.deploy() assert requests.get("http://127.0.0.1:8000/AsyncCounter").json() == { "count": 1 } assert requests.get("http://127.0.0.1:8000/AsyncCounter").json() == { "count": 2 } assert ray.get(AsyncCounter.get_handle().remote()) == {"count": 3}
def test_replica_spread(ray_cluster): cluster = ray_cluster cluster.add_node(num_cpus=2) # NOTE(edoakes): we need to start serve before adding the worker node to # guarantee that the controller is placed on the head node (we should be # able to tolerate being placed on workers, but there's currently a bug). # We should add an explicit test for that in the future when it's fixed. cluster.connect(namespace=SERVE_NAMESPACE) serve.start(detached=True) worker_node = cluster.add_node(num_cpus=2) @serve.deployment(num_replicas=2) def get_node_id(): return os.getpid(), ray.get_runtime_context().node_id.hex() h = serve.run(get_node_id.bind()) def get_num_nodes(): pids = set() node_ids = set() while len(pids) < 2: pid, node = ray.get(h.remote()) pids.add(pid) node_ids.add(node) return len(node_ids) # Check that the two replicas are spread across the two nodes. wait_for_condition(lambda: get_num_nodes() == 2) # Kill the worker node. The second replica should get rescheduled on # the head node. cluster.remove_node(worker_node) # Check that the replica on the dead node can be rescheduled. wait_for_condition(lambda: get_num_nodes() == 1)
def setup_anyscale_cluster(): """Setup ray cluster at anyscale via ray.client() Note this is by default large scale and should be kicked off less frequently. """ # TODO: Ray client didn't work with releaser script yet because # we cannot connect to anyscale cluster from its headnode # ray.client().env({}).connect() ray.init(address="auto") serve_client = serve.start() return serve_client