def test_deployment_properties(): class DClass(): pass D = serve.deployment(name="name", init_args=("hello", 123), version="version", num_replicas=2, user_config="hi", max_concurrent_queries=100, route_prefix="/hello", ray_actor_options={"num_cpus": 2})(DClass) assert D.name == "name" assert D.init_args == ("hello", 123) assert D.version == "version" assert D.num_replicas == 2 assert D.user_config == "hi" assert D.max_concurrent_queries == 100 assert D.route_prefix == "/hello" assert D.ray_actor_options == {"num_cpus": 2} D = serve.deployment( version=None, route_prefix=None, )(DClass) assert D.version is None assert D.route_prefix is None
def test_import_path_deployment(self, serve_instance): test_env_uri = ( "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip" ) test_module_uri = ( "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip" ) ray_actor_options = { "runtime_env": {"py_modules": [test_env_uri, test_module_uri]} } shallow = serve.deployment( name="shallow", ray_actor_options=ray_actor_options, )("test_env.shallow_import.ShallowClass") deep = serve.deployment( name="deep", ray_actor_options=ray_actor_options, )("test_env.subdir1.subdir2.deep_import.DeepClass") one = serve.deployment( name="one", ray_actor_options=ray_actor_options, )("test_module.test.one") deployments = [shallow, deep, one] responses = ["Hello shallow world!", "Hello deep world!", 2] self.deploy_and_check_responses(deployments, responses)
def test_simple_adder(serve_instance): serve.deployment(name="Adder")(ModelWrapper).deploy( predictor_cls=AdderPredictor, checkpoint=AdderCheckpoint.from_dict({"increment": 2}), ) resp = ray.get(send_request.remote(json={"array": [40]})) assert resp == {"value": [42], "batch_size": 1}
def test_batching(serve_instance): serve.deployment(name="Adder")(ModelWrapper).deploy( predictor_cls=AdderPredictor, checkpoint=AdderCheckpoint.from_dict({"increment": 2}), batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000), ) refs = [send_request.remote(json={"array": [40]}) for _ in range(2)] for resp in ray.get(refs): assert resp == {"value": [42], "batch_size": 2}
def test_get_import_path_nested_actor(self): d = serve.deployment(name="actor")(DecoratedActor) # CI may change the parent path, so check only that the suffix matches. assert get_deployment_import_path(d).endswith( "ray.serve.tests.test_util.DecoratedActor" )
def test_import_path_deployment_decorated(self, serve_instance): func = serve.deployment( name="decorated_func", )("ray.serve.tests.test_application.decorated_func") clss = serve.deployment( name="decorated_clss", )("ray.serve.tests.test_application.DecoratedClass") deployments = [func, clss] responses = ["got decorated func", "got decorated class"] self.deploy_and_check_responses(deployments, responses) # Check that non-default decorated values were overwritten assert serve.get_deployment("decorated_func").max_concurrent_queries != 17 assert serve.get_deployment("decorated_clss").max_concurrent_queries != 17
def test_class_factory(serve_instance): with InputNode() as _: instance = serve.deployment(class_factory()).bind(3) output = instance.get.bind() serve_dag = NoargDriver.bind(output) handle = serve.run(serve_dag) assert ray.get(handle.remote()) == 3 assert requests.get("http://127.0.0.1:8000/").text == "3"
def make_ingress_deployment( name: str, serve_dag_root_json: str, input_schema_path: str ): return serve.deployment(ModelWrapper).options( name=name, init_kwargs={ "predictor_cls": _get_import_path(PipelineIngressModel), "checkpoint": { "checkpoint_cls": _get_import_path(PipelineIngressCheckpoint), "uri": serve_dag_root_json, }, "input_schema": input_schema_path, "batching_params": False, }, )
def test_serve_pipeline_class_factory_plot(): with InputNode() as _: instance = serve.deployment(class_factory()).bind(3) output = instance.get.bind() serve_dag = NoargDriver.bind(output) serve_dag = ray_dag_to_serve_dag(serve_dag) with tempfile.TemporaryDirectory() as tmpdir: to_file = os.path.join(tmpdir, "tmp.png") ray.experimental.dag.plot(serve_dag, to_file) assert os.path.isfile(to_file) graph = ray.experimental.dag.vis_utils.dag_to_dot(serve_dag) to_string = graph.to_string() assert "MyInlineClass -> get" in to_string assert "get -> NoargDriver" in to_string
def create_deployment( address: str, namespace: str, runtime_env_json: str, deployment: str, options_json: str, ): ray.init( address=address, namespace=namespace, runtime_env=json.loads(runtime_env_json), ) deployment_cls = import_attr(deployment) if not isinstance(deployment_cls, Deployment): deployment_cls = serve.deployment(deployment_cls) options = json.loads(options_json) deployment_cls.options(**options).deploy()
def init( backend, num_replicas: int, max_concurrent_queries: int, resources, counter, conda_env, ): backend_path = backend.split(".")[1:-2] route = "/".join(backend_path).lower() backend_module, backend_class = backend.rsplit(".", 1) backend_module = importlib.import_module(backend_module) backend_class = getattr(backend_module, backend_class) ray_actor_options = {} if "cpu" in resources: ray_actor_options["num_cpus"] = resources["cpu"] if "gpu" in resources: ray_actor_options["num_gpus"] = resources["gpu"] if conda_env: conda_env = ray.serve.CondaEnv(conda_env) deployment = serve.deployment(backend_class) deployment.options( name=route, route_prefix="/" + route, ray_actor_options=ray_actor_options, num_replicas=num_replicas, max_concurrent_queries=max_concurrent_queries, ).deploy( counter, route, max_concurrent_queries, )
def create_deployment(deployment: str, options_json: str): deployment_cls = import_attr(deployment) if not isinstance(deployment_cls, Deployment): deployment_cls = serve.deployment(deployment_cls) options = json.loads(options_json) deployment_cls.options(**options).deploy()
def run( config_or_import_path: str, args_and_kwargs: Tuple[str], runtime_env: str, runtime_env_json: str, working_dir: str, address: str, ): # Check if path provided is for config or import is_config = pathlib.Path(config_or_import_path).is_file() args, kwargs = _process_args_and_kwargs(args_and_kwargs) # Calculate deployments' runtime env updates requested via args runtime_env_updates = parse_runtime_env_args( runtime_env=runtime_env, runtime_env_json=runtime_env_json, working_dir=working_dir, ) # Create ray.init()'s runtime_env if "working_dir" in runtime_env_updates: ray_runtime_env = { "working_dir": runtime_env_updates.pop("working_dir") } else: ray_runtime_env = {} if is_config: config_path = config_or_import_path # Delay serve.start() to catch invalid inputs without waiting if len(args) + len(kwargs) > 0: raise ValueError( "ARGS_AND_KWARGS cannot be defined for a " "config file deployment. Please specify the " "init_args and init_kwargs inside the config file.") cli_logger.print("Deploying application in config file at " f"{config_path}.") with open(config_path, "r") as config_file: app = Application.from_yaml(config_file) else: import_path = config_or_import_path if "." not in import_path: raise ValueError( "Import paths must be of the form " '"module.submodule_1...submodule_n.MyClassOrFunction".') cli_logger.print( f'Deploying function or class imported from "{import_path}".') deployment_name = import_path[import_path.rfind(".") + 1:] deployment = serve.deployment(name=deployment_name)(import_path) app = Application( [deployment.options(init_args=args, init_kwargs=kwargs)]) ray.init(address=address, namespace="serve", runtime_env=ray_runtime_env) for deployment in app: _configure_runtime_env(deployment, runtime_env_updates) app.run(logger=cli_logger)
def test_redeploy_multiple_replicas(serve_instance, use_handle): # Tests that redeploying a deployment with multiple replicas performs # a rolling update. client = serve_instance name = "test" @ray.remote(num_cpus=0) def call(block=False): if use_handle: handle = serve.get_handle(name, missing_ok=True) ret = ray.get(handle.remote(block=str(block))) else: ret = requests.get( f"http://localhost:8000/{name}", params={ "block": block }).text return ret.split("|")[0], ret.split("|")[1] signal_name = f"signal-{get_random_letters()}" signal = SignalActor.options(name=signal_name).remote() async def v1(request): if request.query_params["block"] == "True": signal = ray.get_actor(signal_name) await signal.wait.remote() return f"1|{os.getpid()}" def v2(*args): return f"2|{os.getpid()}" def make_nonblocking_calls(expected, expect_blocking=False): # Returns dict[val, set(pid)]. blocking = [] responses = defaultdict(set) start = time.time() while time.time() - start < 30: refs = [call.remote(block=False) for _ in range(10)] ready, not_ready = ray.wait(refs, timeout=0.5) for ref in ready: val, pid = ray.get(ref) responses[val].add(pid) for ref in not_ready: blocking.extend(not_ready) if (all( len(responses[val]) == num for val, num in expected.items()) and (expect_blocking is False or len(blocking) > 0)): break else: assert False, f"Timed out, responses: {responses}." return responses, blocking v1 = serve.deployment(name=name, version="1", num_replicas=2)(v1) v1.deploy() responses1, _ = make_nonblocking_calls({"1": 2}) pids1 = responses1["1"] # ref2 will block a single replica until the signal is sent. Check that # some requests are now blocking. ref2 = call.remote(block=True) responses2, blocking2 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) assert list(responses2["1"])[0] in pids1 # Redeploy new version. Since there is one replica blocking, only one new # replica should be started up. v2 = v1.options(backend_def=v2, version="2") goal_ref = v2.deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) responses3, blocking3 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) # Signal the original call to exit. ray.get(signal.send.remote()) val, pid = ray.get(ref2) assert val == "1" assert pid in responses1["1"] # Now the goal and requests to the new version should complete. # We should have two running replicas of the new version. assert client._wait_for_goal(goal_ref) make_nonblocking_calls({"2": 2})