def test_get_after_delete(self, serve_instance, use_list_api): name = "test" @serve.deployment(name=name, version="1") def d(*args): return "1", os.getpid() serve.run(d.bind()) del d d2 = self.get_deployment(name, use_list_api) d2.delete() del d2 with pytest.raises(KeyError): self.get_deployment(name, use_list_api)
def test_passing_handle(serve_instance, use_build): child = Adder.bind(1) parent = TakeHandle.bind(child) driver = DAGDriver.bind(parent, input_schema=json_resolver) handle = serve.run(driver) assert ray.get(handle.predict.remote(1)) == 2 assert requests.post("http://127.0.0.1:8000/", json=1).json() == 2
def test_single_func_deployment_dag(serve_instance, use_build): with InputNode() as dag_input: dag = combine.bind(dag_input[0], dag_input[1], kwargs_output=1) serve_dag = DAGDriver.bind(dag, input_schema=json_resolver) handle = serve.run(serve_dag) assert ray.get(handle.predict.remote([1, 2])) == 4 assert requests.post("http://127.0.0.1:8000/", json=[1, 2]).json() == 4
def test_user_defined_method_hangs(serve_instance): h = serve.run(Patient.bind()) actor = ray.get(h.remote()) ray.get(h.set_should_hang.remote()) wait_for_condition(check_new_actor_started, handle=h, original_actors=actor) ray.get([h.remote() for _ in range(100)])
def test_batching(serve_instance): @serve.deployment class BatchingExample: def __init__(self): self.count = 0 @serve.batch(max_batch_size=5, batch_wait_timeout_s=1) async def handle_batch(self, requests): self.count += 1 batch_size = len(requests) return [self.count] * batch_size async def __call__(self, request): return await self.handle_batch(request) handle = serve.run(BatchingExample.bind()) future_list = [] for _ in range(20): f = handle.remote(1) future_list.append(f) counter_result = ray.get(future_list) # since count is only updated per batch of queries # If there atleast one __call__ fn call with batch size greater than 1 # counter result will always be less than 20 assert max(counter_result) < 20
def test_single_func_no_input(serve_instance, use_build): dag = fn_hello.bind() serve_dag = NoargDriver.bind(dag) handle = serve.run(maybe_build(serve_dag, use_build)) assert ray.get(handle.remote()) == "hello" assert requests.get("http://127.0.0.1:8000/").text == "hello"
def test_e2e(serve_instance): @serve.deployment( autoscaling_config={ "metrics_interval_s": 0.1, "min_replicas": 1, "max_replicas": 1, }, # We will send over a lot of queries. This will make sure replicas are # killed quickly during cleanup. _graceful_shutdown_timeout_s=1, max_concurrent_queries=1000, version="v1", ) class A: def __call__(self): time.sleep(0.5) handle = serve.run(A.bind()) [handle.remote() for _ in range(100)] # Wait for metrics to propagate def get_data(): return ray.get(serve_instance._controller. _dump_autoscaling_metrics_for_testing.remote()) wait_for_condition(lambda: len(get_data()) > 0) # Many queries should be inflight. def last_timestamp_value(): data = get_data() only_key = list(data.keys())[0] print(data[only_key][-1]) return data[only_key][-1] wait_for_condition(lambda: last_timestamp_value().value > 50)
def test_deployment_error_handling(serve_instance): @serve.deployment def f(): pass with pytest.raises(ValidationError, match="1 validation error for RayActorOptionsSchema.*"): # This is an invalid configuration since dynamic upload of working # directories is not supported. The error this causes in the controller # code should be caught and reported back to the `deploy` caller. serve.run( f.options(ray_actor_options={ "runtime_env": { "working_dir": "." } }).bind())
def run_graph(): """Deploys a Serve application to the controller's Ray cluster.""" from ray import serve from ray._private.utils import import_attr from ray.serve.api import build # Import and build the graph graph = import_attr("test_config_files.pizza.serve_dag") app = build(graph) # Override options for each deployment for name in app.deployments: app.deployments[name].set_options(ray_actor_options={"num_cpus": 0.1}) # Run the graph locally on the cluster serve.start(detached=True) serve.run(graph)
def test_empty_decorator(serve_instance): @serve.deployment def func(*args): return "hi" @serve.deployment class Class: def ping(self, *args): return "pong" assert func.name == "func" assert Class.name == "Class" func_handle = serve.run(func.bind()) assert ray.get(func_handle.remote()) == "hi" class_handle = serve.run(Class.bind()) assert ray.get(class_handle.ping.remote()) == "pong"
def run_graph(import_path: str, deployment_override_options: List[Dict]): """Deploys a Serve application to the controller's Ray cluster.""" from ray import serve from ray.serve.api import build # Import and build the graph graph = import_attr(import_path) app = build(graph) # Override options for each deployment for options_dict in deployment_override_options: name = options_dict["name"] app.deployments[name].set_options(**options_dict) # Run the graph locally on the cluster serve.start(_override_controller_namespace="serve") serve.run(app)
def test_pass_handle_to_multiple(serve_instance, use_build): child = Child.bind() parent = Parent.bind(child) grandparent = maybe_build(GrandParent.bind(child, parent), use_build) handle = serve.run(grandparent) assert ray.get(handle.remote()) == "ok"
def run( config_or_import_path: str, runtime_env: str, runtime_env_json: str, working_dir: str, app_dir: str, address: str, host: str, port: int, blocking: bool, ): sys.path.insert(0, app_dir) final_runtime_env = parse_runtime_env_args( runtime_env=runtime_env, runtime_env_json=runtime_env_json, working_dir=working_dir, ) app_or_node = None if pathlib.Path(config_or_import_path).is_file(): config_path = config_or_import_path cli_logger.print(f"Deploying from config file: '{config_path}'.") with open(config_path, "r") as config_file: app_or_node = Application.from_yaml(config_file) else: import_path = config_or_import_path cli_logger.print(f"Deploying from import path: '{import_path}'.") app_or_node = import_attr(import_path) # Setting the runtime_env here will set defaults for the deployments. ray.init(address=address, namespace="serve", runtime_env=final_runtime_env) try: serve.run(app_or_node, host=host, port=port) cli_logger.success("Deployed successfully.") if blocking: while True: # Block, letting Ray print logs to the terminal. time.sleep(10) except KeyboardInterrupt: cli_logger.info("Got KeyboardInterrupt, shutting down...") serve.shutdown() sys.exit()
async def put_all_deployments(self, req: Request) -> Response: from ray import serve from ray.serve.context import get_global_client from ray.serve.schema import ServeApplicationSchema from ray.serve.application import Application config = ServeApplicationSchema.parse_obj(await req.json()) if config.import_path is not None: client = get_global_client(_override_controller_namespace="serve") client.deploy_app(config) else: # TODO (shrekris-anyscale): Remove this conditional path app = Application.from_dict(await req.json()) serve.run(app, _blocking=False) return Response()
def test_nested_actors(serve_instance): signal = SignalActor.remote() @ray.remote(num_cpus=1) class CustomActor: def __init__(self) -> None: signal.send.remote() @serve.deployment class A: def __init__(self) -> None: self.a = CustomActor.remote() serve.run(A.bind()) # The nested actor should start successfully. ray.get(signal.wait.remote(), timeout=10)
def test_simple_class_with_class_method(serve_instance, use_build): with InputNode() as dag_input: model = Model.bind(2, ratio=0.3) dag = model.forward.bind(dag_input) serve_dag = DAGDriver.bind(dag, http_adapter=json_resolver) handle = serve.run(serve_dag) assert ray.get(handle.predict.remote(1)) == 0.6 assert requests.post("http://127.0.0.1:8000/", json=1).json() == 0.6
def test_reconfigure_with_exception(serve_instance): @serve.deployment class A: def __init__(self): self.config = "yoo" def reconfigure(self, config): if config == "hi": raise Exception("oops") self.config = config def __call__(self, *args): return self.config with pytest.raises(ValidationError): serve.run(A.options(user_config="hi").bind())
def test_shutdown(ray_shutdown): ray.init(num_cpus=16) serve.start(http_options=dict(port=8003)) @serve.deployment def f(): pass serve.run(f.bind()) serve_controller_name = serve.context._global_client._controller_name actor_names = [ serve_controller_name, format_actor_name( SERVE_PROXY_NAME, serve.context._global_client._controller_name, get_all_node_ids()[0][0], ), ] def check_alive(): alive = True for actor_name in actor_names: try: ray.get_actor(actor_name, namespace=SERVE_NAMESPACE) except ValueError: alive = False return alive wait_for_condition(check_alive) serve.shutdown() with pytest.raises(RayServeException): serve.list_deployments() def check_dead(): for actor_name in actor_names: try: ray.get_actor(actor_name, namespace=SERVE_NAMESPACE) return False except ValueError: pass return True wait_for_condition(check_dead)
def test_deploy_from_dict(self, serve_instance): config_file_name = os.path.join(os.path.dirname(__file__), "test_config_files", "two_deployments.yaml") with open(config_file_name, "r") as config_file: config_dict = yaml.safe_load(config_file) app = Application.from_dict(config_dict) app_dict = app.to_dict() compare_specified_options(config_dict, app_dict) serve.run(app.from_dict(app_dict)) assert (requests.get("http://localhost:8000/shallow").text == "Hello shallow world!") assert requests.get("http://localhost:8000/one").text == "2"
def test_serve_namespace(shutdown_ray, detached, ray_namespace): """Test that Serve starts in SERVE_NAMESPACE regardless of driver namespace.""" with ray.init(namespace=ray_namespace): @serve.deployment def f(*args): return "got f" serve.run(f.bind()) actors = ray.util.list_named_actors(all_namespaces=True) assert len(actors) == 3 assert all(actor["namespace"] == SERVE_NAMESPACE for actor in actors) assert requests.get("http://localhost:8000/f").text == "got f" serve.shutdown()
def test_run_delete_old_deployments(serve_instance): """Check that serve.run() can remove all old deployments""" @serve.deployment(name="f", route_prefix="/test1") def f(): return "got f" @serve.deployment(name="g", route_prefix="/test2") def g(): return "got g" ingress_handle = serve.run(f.bind()) assert ray.get(ingress_handle.remote()) == "got f" ingress_handle = serve.run(g.bind()) assert ray.get(ingress_handle.remote()) == "got g" assert "g" in serve.list_deployments() assert "f" not in serve.list_deployments()
async def put_all_deployments(self, req: Request) -> Response: from ray import serve from ray.serve.context import get_global_client from ray.serve.application import Application app = Application.from_dict(await req.json()) serve.run(app, _blocking=False) new_names = set() for deployment in app.deployments.values(): new_names.add(deployment.name) all_deployments = serve.list_deployments() all_names = set(all_deployments.keys()) names_to_delete = all_names.difference(new_names) get_global_client().delete_deployments(names_to_delete) return Response()
def test_deploy_empty_version(self, serve_instance, use_list_api): name = "test" @serve.deployment(name=name) def d(*args): return "1", os.getpid() handle = serve.run(d.bind()) val1, pid1 = ray.get(handle.remote()) assert val1 == "1" del d d2 = self.get_deployment(name, use_list_api) handle = serve.run(d2.bind()) val2, pid2 = ray.get(handle.remote()) assert val2 == "1" assert pid2 != pid1
def test_class_factory(serve_instance): with InputNode() as _: instance = serve.deployment(class_factory()).bind(3) output = instance.get.bind() serve_dag = NoargDriver.bind(output) handle = serve.run(serve_dag) assert ray.get(handle.remote()) == 3 assert requests.get("http://127.0.0.1:8000/").text == "3"
def test_single_node_driver_sucess(serve_instance, use_build): m1 = Adder.bind(1) m2 = Adder.bind(2) with InputNode() as input_node: out = m1.forward.bind(input_node) out = m2.forward.bind(out) driver = DAGDriver.bind(out, http_adapter=json_resolver) handle = serve.run(driver) assert ray.get(handle.predict.remote(39)) == 42 assert requests.post("http://127.0.0.1:8000/", json=39).json() == 42
def test_shared_deployment_handle(serve_instance, use_build): with InputNode() as dag_input: m = Model.bind(2) combine = Combine.bind(m, m2=m) combine_output = combine.__call__.bind(dag_input) serve_dag = DAGDriver.bind(combine_output, http_adapter=json_resolver) handle = serve.run(serve_dag) assert ray.get(handle.predict.remote(1)) == 4 assert requests.post("http://127.0.0.1:8000/", json=1).json() == 4
def test_scale_replicas(self, serve_instance, use_list_api): name = "test" @serve.deployment(name=name) def d(*args): return os.getpid() def check_num_replicas(num): handle = self.get_deployment(name, use_list_api).get_handle() assert len(set(ray.get([handle.remote() for _ in range(50)]))) == num serve.run(d.bind()) check_num_replicas(1) del d d2 = self.get_deployment(name, use_list_api) serve.run(d2.options(num_replicas=2).bind()) check_num_replicas(2)
def test_http_proxy_request_cancellation(serve_instance): # https://github.com/ray-project/ray/issues/21425 s = SignalActor.remote() @serve.deployment(max_concurrent_queries=1) class A: def __init__(self) -> None: self.counter = 0 async def __call__(self): self.counter += 1 ret_val = self.counter await s.wait.remote() return ret_val serve.run(A.bind()) url = "http://127.0.0.1:8000/A" with ThreadPoolExecutor() as pool: # Send the first request, it should block for the result first_blocking_fut = pool.submit( functools.partial(requests.get, url, timeout=100)) time.sleep(1) assert not first_blocking_fut.done() # Send more requests, these should be queued in handle. # But because first request is hanging and these have low timeout. # They should all disconnect from http connection. # These requests should never reach the replica. rest_blocking_futs = [ pool.submit(functools.partial(requests.get, url, timeout=0.5)) for _ in range(3) ] time.sleep(1) assert all(f.done() for f in rest_blocking_futs) # Now unblock the first request. ray.get(s.send.remote()) assert first_blocking_fut.result().text == "1" # Sending another request to verify that only one request has been # processed so far. assert requests.get(url).text == "2"
def test_chained_function(serve_instance, use_build): @serve.deployment def func_1(input): return input @serve.deployment def func_2(input): return input * 2 with InputNode() as dag_input: output_1 = func_1.bind(dag_input) output_2 = func_2.bind(dag_input) serve_dag = combine.bind(output_1, output_2) with pytest.raises(ValueError, match="Please provide a driver class"): _ = serve.run(serve_dag) handle = serve.run(DAGDriver.bind(serve_dag, input_schema=json_resolver)) assert ray.get(handle.predict.remote(2)) == 6 # 2 + 2*2 assert requests.post("http://127.0.0.1:8000/", json=2).json() == 6
def test_run_get_ingress_app(serve_instance): """Check that serve.run() with an app returns the ingress.""" @serve.deployment(route_prefix=None) def f(): return "got f" @serve.deployment(route_prefix="/g") def g(): return "got g" app = Application([f, g]) ingress_handle = serve.run(app) assert ray.get(ingress_handle.remote()) == "got g" serve_instance.delete_deployments(["f", "g"]) no_ingress_app = Application([f.options(route_prefix="/f"), g]) ingress_handle = serve.run(no_ingress_app) assert ingress_handle is None