def test_e2e(serve_instance): serve.init() # so we have access to global state serve.create_endpoint("endpoint", "/api", blocking=True) result = serve.api._get_global_state().route_table.list_service() assert result["/api"] == "endpoint" retry_count = 5 timeout_sleep = 0.5 while True: try: resp = requests.get("http://127.0.0.1:8000/", timeout=0.5).json() assert resp == result break except Exception: time.sleep(timeout_sleep) timeout_sleep *= 2 retry_count -= 1 if retry_count == 0: assert False, "Route table hasn't been updated after 3 tries." def function(flask_request): return "OK" serve.create_backend(function, "echo:v1") serve.link("endpoint", "echo:v1") resp = requests.get("http://127.0.0.1:8000/api").json()["result"] assert resp == "OK"
def serve_instance(): _, new_db_path = tempfile.mkstemp(suffix=".test.db") serve.init(kv_store_path=new_db_path, blocking=True, ray_init_kwargs={"num_cpus": 36}) yield os.remove(new_db_path)
def scale(backend_tag, num_replicas): if num_replicas <= 0: click.Abort( "Cannot set number of replicas to be smaller or equal to 0.") ray.init(address="auto") serve.init() serve.scale(backend_tag, num_replicas)
""" import time import requests import ray from ray.experimental import serve from ray.experimental.serve.utils import pformat_color_json def echo(_): raise Exception("Something went wrong...") serve.init(blocking=True) serve.create_endpoint("my_endpoint", "/echo", blocking=True) serve.create_backend(echo, "echo:v1") serve.link("my_endpoint", "echo:v1") for _ in range(2): resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2) handle = serve.get_handle("my_endpoint") print("Invoke from python will raise exception with traceback:") ray.get(handle.remote())
import requests from ray.experimental import serve from ray.experimental.serve.utils import pformat_color_json def echo_v1(_): return "v1" def echo_v2(_): return "v2" # specify the router policy as RoundRobin serve.init(blocking=True, queueing_policy=serve.RoutePolicy.RoundRobin) # create a service serve.create_endpoint("my_endpoint", "/echo", blocking=True) # create first backend serve.create_backend(echo_v1, "echo:v1") # create second backend serve.create_backend(echo_v2, "echo:v2") # link and split the service to two backends serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True: resp = requests.get("http://127.0.0.1:8000/echo").json()
def serve_instance(): _, new_db_path = tempfile.mkstemp(suffix=".test.db") serve.init(kv_store_path=new_db_path, blocking=True) yield os.remove(new_db_path)
def serve_instance(): serve.init(blocking=True) yield
from ray.experimental import serve from ray.experimental.serve.utils import pformat_color_json def echo_v1(_): return "v1" def echo_v2(_): return "v2" # specify the router policy as FixedPacking with packing num as 5 serve.init(blocking=True, queueing_policy=serve.RoutePolicy.FixedPacking, policy_kwargs={"packing_num": 5}) # create a service serve.create_endpoint("my_endpoint", "/echo", blocking=True) # create first backend serve.create_backend(echo_v1, "echo:v1") # create second backend serve.create_backend(echo_v2, "echo:v2") # link and split the service to two backends serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True:
def benchmark(func, name): for _ in range(NUM_WARMUPS): func() for _ in range(NUM_REPEATS): with profile(name): func() def work(_): time.sleep(0.05) @ray.remote def work_ray(): time.sleep(0.05) serve.init() serve.create_endpoint('sleep', '/') serve.create_backend(work, 'sleep:v1') serve.link('sleep', 'sleep:v1') handle = serve.get_handle('sleep') benchmark(lambda: ray.get(handle.remote()), "serve_sleep") benchmark(lambda: ray.get(work_ray.remote()), "ray_sleep") summarize_profile()
num_cpus=8, num_gpus=0, resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory, webui_host="0.0.0.0") print("Downloading load testing tool") subprocess.call([ "bash", "-c", "rm hey_linux_amd64 || true;" "wget https://storage.googleapis.com/hey-release/hey_linux_amd64;" "chmod +x hey_linux_amd64" ]) ray.init(address=cluster.address, include_webui=True, webui_host="0.0.0.0") serve.init(blocking=True, kv_store_connector=lambda ns: RayInternalKVStore(ns)) @serve.route("/echo") @serve.accept_batch def echo(_): time.sleep(0.01) # Sleep for 10ms ray.show_in_webui(str(serve.context.batch_size), key="Current batch size") return ["hi {}".format(i) for i in range(serve.context.batch_size)] print("Scaling to 30 replicas") config = serve.get_backend_config("echo:v0") config.num_replicas = 30 config.max_batch_size = 16 serve.set_backend_config("echo:v0", config)
def split(endpoint, traffic): ray.init(address="auto") serve.init() serve.split(endpoint, json.loads(traffic))
def init(): ray.init(address="auto") serve.init(blocking=True)
def serve_instance(): serve.init() serve.global_state.wait_until_http_ready() yield