def echo_v1(_): return "v1" def echo_v2(_): return "v2" serve.init(blocking=True) serve.create_endpoint("my_endpoint", "/echo") serve.create_backend(echo_v1, "echo:v1") serve.link("my_endpoint", "echo:v1") for _ in range(3): resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2) serve.create_backend(echo_v2, "echo:v2") serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) while True: resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2)
def split(endpoint, traffic): ray.init(address="auto") serve.init() serve.split(endpoint, json.loads(traffic))
from ray.serve.constants import DEFAULT_HTTP_ADDRESS import requests import time import pandas as pd from tqdm import tqdm serve.init(blocking=True) def noop(_): return "" serve.create_endpoint("noop", "/noop") serve.create_backend(noop, "noop") serve.split("noop", {"noop": 1.0}) url = "{}/noop".format(DEFAULT_HTTP_ADDRESS) while requests.get(url).status_code == 404: time.sleep(1) print("Waiting for noop route to showup.") latency = [] for _ in tqdm(range(5200)): start = time.perf_counter() resp = requests.get(url) end = time.perf_counter() latency.append(end - start) # Remove initial samples latency = latency[200:]
from ray import serve import requests serve.init() class Counter: def __init__(self): self.count = 0 def __call__(self, flask_request): return {"current_counter": self.count} serve.create_endpoint("counter", "/counter") serve.create_backend(Counter, "counter") serve.split("counter", {"counter": 1.0}) requests.get("http://127.0.0.1:8000/counter").json() # > {"current_counter": self.count}