示例#1
0
def test_e2e(serve_instance):
    serve.init()  # so we have access to global state
    serve.create_endpoint("endpoint", "/api", blocking=True)
    result = serve.api._get_global_state().route_table.list_service()
    assert result["/api"] == "endpoint"

    retry_count = 5
    timeout_sleep = 0.5
    while True:
        try:
            resp = requests.get("http://127.0.0.1:8000/", timeout=0.5).json()
            assert resp == result
            break
        except Exception:
            time.sleep(timeout_sleep)
            timeout_sleep *= 2
            retry_count -= 1
            if retry_count == 0:
                assert False, "Route table hasn't been updated after 3 tries."

    def function(flask_request):
        return "OK"

    serve.create_backend(function, "echo:v1")
    serve.link("endpoint", "echo:v1")

    resp = requests.get("http://127.0.0.1:8000/api").json()["result"]
    assert resp == "OK"
示例#2
0
def serve_instance():
    _, new_db_path = tempfile.mkstemp(suffix=".test.db")
    serve.init(kv_store_path=new_db_path,
               blocking=True,
               ray_init_kwargs={"num_cpus": 36})
    yield
    os.remove(new_db_path)
示例#3
0
def scale(backend_tag, num_replicas):
    if num_replicas <= 0:
        click.Abort(
            "Cannot set number of replicas to be smaller or equal to 0.")
    ray.init(address="auto")
    serve.init()

    serve.scale(backend_tag, num_replicas)
示例#4
0
"""

import time

import requests

import ray
from ray.experimental import serve
from ray.experimental.serve.utils import pformat_color_json


def echo(_):
    raise Exception("Something went wrong...")


serve.init(blocking=True)

serve.create_endpoint("my_endpoint", "/echo", blocking=True)
serve.create_backend(echo, "echo:v1")
serve.link("my_endpoint", "echo:v1")

for _ in range(2):
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))

    print("...Sleeping for 2 seconds...")
    time.sleep(2)

handle = serve.get_handle("my_endpoint")
print("Invoke from python will raise exception with traceback:")
ray.get(handle.remote())
import requests

from ray.experimental import serve
from ray.experimental.serve.utils import pformat_color_json


def echo_v1(_):
    return "v1"


def echo_v2(_):
    return "v2"


# specify the router policy as RoundRobin
serve.init(blocking=True, queueing_policy=serve.RoutePolicy.RoundRobin)

# create a service
serve.create_endpoint("my_endpoint", "/echo", blocking=True)

# create first backend
serve.create_backend(echo_v1, "echo:v1")

# create second backend
serve.create_backend(echo_v2, "echo:v2")

# link and split the service to two backends
serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})

while True:
    resp = requests.get("http://127.0.0.1:8000/echo").json()
示例#6
0
def serve_instance():
    _, new_db_path = tempfile.mkstemp(suffix=".test.db")
    serve.init(kv_store_path=new_db_path, blocking=True)
    yield
    os.remove(new_db_path)
示例#7
0
def serve_instance():
    serve.init(blocking=True)
    yield
from ray.experimental import serve
from ray.experimental.serve.utils import pformat_color_json


def echo_v1(_):
    return "v1"


def echo_v2(_):
    return "v2"


# specify the router policy as FixedPacking with packing num as 5
serve.init(blocking=True,
           queueing_policy=serve.RoutePolicy.FixedPacking,
           policy_kwargs={"packing_num": 5})

# create a service
serve.create_endpoint("my_endpoint", "/echo", blocking=True)

# create first backend
serve.create_backend(echo_v1, "echo:v1")

# create second backend
serve.create_backend(echo_v2, "echo:v2")

# link and split the service to two backends
serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})

while True:
示例#9
0
def benchmark(func, name):
    for _ in range(NUM_WARMUPS):
        func()

    for _ in range(NUM_REPEATS):
        with profile(name):
            func()


def work(_):
    time.sleep(0.05)


@ray.remote
def work_ray():
    time.sleep(0.05)


serve.init()
serve.create_endpoint('sleep', '/')
serve.create_backend(work, 'sleep:v1')
serve.link('sleep', 'sleep:v1')

handle = serve.get_handle('sleep')

benchmark(lambda: ray.get(handle.remote()), "serve_sleep")
benchmark(lambda: ray.get(work_ray.remote()), "ray_sleep")

summarize_profile()
示例#10
0
                     num_cpus=8,
                     num_gpus=0,
                     resources={str(i): 2},
                     object_store_memory=object_store_memory,
                     redis_max_memory=redis_max_memory,
                     webui_host="0.0.0.0")

print("Downloading load testing tool")
subprocess.call([
    "bash", "-c", "rm hey_linux_amd64 || true;"
    "wget https://storage.googleapis.com/hey-release/hey_linux_amd64;"
    "chmod +x hey_linux_amd64"
])

ray.init(address=cluster.address, include_webui=True, webui_host="0.0.0.0")
serve.init(blocking=True, kv_store_connector=lambda ns: RayInternalKVStore(ns))


@serve.route("/echo")
@serve.accept_batch
def echo(_):
    time.sleep(0.01)  # Sleep for 10ms
    ray.show_in_webui(str(serve.context.batch_size), key="Current batch size")
    return ["hi {}".format(i) for i in range(serve.context.batch_size)]


print("Scaling to 30 replicas")
config = serve.get_backend_config("echo:v0")
config.num_replicas = 30
config.max_batch_size = 16
serve.set_backend_config("echo:v0", config)
示例#11
0
def split(endpoint, traffic):
    ray.init(address="auto")
    serve.init()

    serve.split(endpoint, json.loads(traffic))
示例#12
0
def init():
    ray.init(address="auto")
    serve.init(blocking=True)
示例#13
0
def serve_instance():
    serve.init()
    serve.global_state.wait_until_http_ready()
    yield