def test_starlette_response(serve_instance): def basic_response(_): return starlette.responses.Response("Hello, world!", media_type="text/plain") serve.create_backend("basic_response", basic_response) serve.create_endpoint("basic_response", backend="basic_response", route="/basic_response") assert requests.get( "http://127.0.0.1:8000/basic_response").text == "Hello, world!" def html_response(_): return starlette.responses.HTMLResponse( "<html><body><h1>Hello, world!</h1></body></html>") serve.create_backend("html_response", html_response) serve.create_endpoint("html_response", backend="html_response", route="/html_response") assert requests.get( "http://127.0.0.1:8000/html_response" ).text == "<html><body><h1>Hello, world!</h1></body></html>" def plain_text_response(_): return starlette.responses.PlainTextResponse("Hello, world!") serve.create_backend("plain_text_response", plain_text_response) serve.create_endpoint("plain_text_response", backend="plain_text_response", route="/plain_text_response") assert requests.get( "http://127.0.0.1:8000/plain_text_response").text == "Hello, world!" def json_response(_): return starlette.responses.JSONResponse({"hello": "world"}) serve.create_backend("json_response", json_response) serve.create_endpoint("json_response", backend="json_response", route="/json_response") assert requests.get( "http://127.0.0.1:8000/json_response").json()["hello"] == "world" def redirect_response(_): return starlette.responses.RedirectResponse( url="http://127.0.0.1:8000/basic_response") serve.create_backend("redirect_response", redirect_response) serve.create_endpoint("redirect_response", backend="redirect_response", route="/redirect_response") assert requests.get( "http://127.0.0.1:8000/redirect_response").text == "Hello, world!" def streaming_response(_): async def slow_numbers(): for number in range(1, 4): yield str(number) await asyncio.sleep(0.01) return starlette.responses.StreamingResponse(slow_numbers(), media_type="text/plain", status_code=418) serve.create_backend("streaming_response", streaming_response) serve.create_endpoint("streaming_response", backend="streaming_response", route="/streaming_response") resp = requests.get("http://127.0.0.1:8000/streaming_response") assert resp.text == "123" assert resp.status_code == 418
def connect_in_backend(_): serve.create_backend("backend-ception", connect_in_backend)
""" Example service that prints out http context. """ import time import requests from ray import serve from ray.serve.utils import pformat_color_json def echo(flask_request): return "hello " + flask_request.args.get("name", "serve!") serve.init() serve.create_backend("echo:v1", echo) serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") while True: resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2)
payload["petal length"], payload["petal width"], ] prediction = self.model.predict([input_vector])[0] human_name = self.label_list[prediction] return {"result": human_name} # connect to our existing Ray cluster # note that the password will be different for your redis instance! ray.init(address="auto") # now we initialize /connect to the Ray service # listen on 0.0.0.0 to make the HTTP server accessible from other machines. serve.init(http_host="0.0.0.0") serve.create_backend("lr:v1", BoostingModel) serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor") # __doc_create_deploy_end__ # __doc_query_begin__ import requests # noqa: E402 sample_request_input = { "sepal length": 1.2, "sepal width": 1.0, "petal length": 1.1, "petal width": 0.9, } response = requests.get( "http://localhost:8000/regressor", json=sample_request_input) print(response.text)
return colorful_json class MagicCounter: def __init__(self, increment): self.increment = increment def __call__(self, flask_request, base_number=None): if serve.context.web: base_number = int(flask_request.args.get("base_number", "0")) return base_number + self.increment serve.init() serve.create_backend("counter:v1", MagicCounter, 42) # increment=42 serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter") print("Sending ten queries via HTTP") for i in range(10): url = "http://127.0.0.1:8000/counter?base_number={}".format(i) print("> Pinging {}".format(url)) resp = requests.get(url).json() print(pformat_color_json(resp)) time.sleep(0.2) print("Sending ten queries via Python") handle = serve.get_handle("magic_counter") for i in range(10): print("> Pinging handle.remote(base_number={})".format(i))
# blocking=True will wait for HTTP server to be ready to serve request. serve.init(blocking=True) # an endpoint is associated with an http URL. serve.create_endpoint("my_endpoint", "/echo") # a backend can be a function or class. # it can be made to be invoked from web as well as python. def echo_v1(flask_request, response="hello from python!"): if serve.context.web: response = flask_request.url return response serve.create_backend(echo_v1, "echo:v1") serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) # wait for routing table to get populated time.sleep(2) # relative slo (10 ms deadline) can be specified via http slo_ms = 10.0 # absolute slo (10 ms deadline) can be specified via http abs_slo_ms = 11.9 print("> [HTTP] Pinging http://127.0.0.1:8000/" "echo?relative_slo_ms={}".format(slo_ms)) print( requests.get("http://127.0.0.1:8000/" "echo?relative_slo_ms={}".format(slo_ms)).json()) print("> [HTTP] Pinging http://127.0.0.1:8000/"
"chmod +x hey_linux_amd64" ]) ray.init(address=cluster.address, dashboard_host="0.0.0.0") serve.init() @serve.accept_batch def echo(_): time.sleep(0.01) # Sleep for 10ms ray.show_in_webui(str(serve.context.batch_size), key="Current batch size") return ["hi {}".format(i) for i in range(serve.context.batch_size)] config = {"num_replicas": 30, "max_batch_size": 16} serve.create_backend("echo:v1", echo, config=config) serve.create_endpoint("echo", backend="echo:v1", route="/echo") print("Warming up") for _ in range(5): resp = requests.get("http://127.0.0.1:8000/echo").text print(resp) time.sleep(0.5) connections = int(config["num_replicas"] * config["max_batch_size"] * 0.75) while True: proc = subprocess.Popen([ "./hey_linux_amd64", "-c", str(connections), "-z", "60m", "http://127.0.0.1:8000/echo" ],
[self.preprocessor(i).unsqueeze(0) for i in pil_images]) print("[2/3] Images transformed, tensor shape {}".format( input_tensor.shape)) with torch.no_grad(): output_tensor = self.model(input_tensor) print("[3/3] Inference done!") return {"class_index": int(torch.argmax(output_tensor[0]))} # __doc_define_servable_end__ ray.init(num_cpus=8) # __doc_deploy_begin__ serve.start() serve.create_backend("resnet18:v0", ImageModel) serve.create_endpoint("predictor", backend="resnet18:v0", route="/image_predict", methods=["POST"]) # __doc_deploy_end__ # __doc_query_begin__ ray_logo_bytes = requests.get( "https://github.com/ray-project/ray/raw/" "master/doc/source/images/ray_header_logo.png").content resp = requests.post("http://localhost:8000/image_predict", data=ray_logo_bytes) print(resp.json()) # Output
if serve.context.batch_size is not None: batch_size = serve.context.batch_size result = [] for base_num in base_number: ret_str = "Number: {} Batch size: {}".format( base_num, batch_size) result.append(ret_str) return result return "" serve.init(blocking=True) serve.create_endpoint("magic_counter", "/counter", blocking=True) # specify max_batch_size in BackendConfig b_config = BackendConfig(max_batch_size=5) serve.create_backend( MagicCounter, "counter:v1", 42, backend_config=b_config) # increment=42 print("Backend Config for backend: 'counter:v1'") print(b_config) serve.link("magic_counter", "counter:v1") handle = serve.get_handle("magic_counter") future_list = [] # fire 30 requests for r in range(30): print("> [REMOTE] Pinging handle.remote(base_number={})".format(r)) f = handle.remote(base_number=r) future_list.append(f) # get results of queries as they complete left_futures = future_list