def test_autoscaling_0_replica(serve_instance): autoscaling_config = { "metrics_interval_s": 0.1, "min_replicas": 0, "max_replicas": 2, "look_back_period_s": 0.4, "downscale_delay_s": 0, "upscale_delay_s": 0, } @serve.deployment( autoscaling_config=autoscaling_config, ) class Model: def __init__(self, weight): self.weight = weight def forward(self, input): return input + self.weight with InputNode() as user_input: model = Model.bind(1) output = model.forward.bind(user_input) serve_dag = DAGDriver.options( route_prefix="/my-dag", autoscaling_config=autoscaling_config, ).bind(output) dag_handle = serve.run(serve_dag) assert 2 == ray.get(dag_handle.predict.remote(1))
def test_deploy_nullify_route_prefix(serve_instance, prefixes): @serve.deployment def f(*args): return "got me" for prefix in prefixes: dag = DAGDriver.options(route_prefix=prefix).bind(f.bind()) handle = serve.run(dag) if prefix is None: assert requests.get("http://localhost:8000/f").status_code == 404 else: assert requests.get("http://localhost:8000/f").text == '"got me"' assert ray.get(handle.predict.remote()) == "got me"
def test_autoscaling_with_chain_nodes(min_replicas, serve_instance): signal = SignalActor.remote() autoscaling_config = { "metrics_interval_s": 0.1, "min_replicas": min_replicas, "max_replicas": 2, "look_back_period_s": 0.4, "downscale_delay_s": 30, "upscale_delay_s": 0, } @serve.deployment( autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ) class Model1: def __init__(self, weight): self.weight = weight def forward(self, input): ray.get(signal.wait.remote()) return input + self.weight @serve.deployment( autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ) class Model2: def __init__(self, weight): self.weight = weight def forward(self, input): return input + self.weight with InputNode() as user_input: model1 = Model1.bind(0) model2 = Model2.bind(1) output = model1.forward.bind(user_input) output2 = model2.forward.bind(output) serve_dag = DAGDriver.options( route_prefix="/my-dag", autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ).bind(output2) dag_handle = serve.run(serve_dag) controller = serve_instance._controller # upscaling [dag_handle.predict.remote(0) for _ in range(10)] wait_for_condition( lambda: get_num_running_replicas(controller, DAGDriver.name) >= 1) [dag_handle.predict.remote(0) for _ in range(10)] wait_for_condition( lambda: get_num_running_replicas(controller, DAGDriver.name) >= 2) wait_for_condition( lambda: get_num_running_replicas(controller, Model1.name) >= 1, timeout=40) wait_for_condition( lambda: get_num_running_replicas(controller, Model1.name) >= 2, timeout=40) signal.send.remote() wait_for_condition( lambda: get_num_running_replicas(controller, Model2.name) >= 1, timeout=40) # downscaling wait_for_condition( lambda: get_num_running_replicas(controller, DAGDriver.name) == min_replicas, timeout=60, ) wait_for_condition( lambda: get_num_running_replicas(controller, Model1.name) == min_replicas, timeout=60, ) wait_for_condition( lambda: get_num_running_replicas(controller, Model2.name) == min_replicas, timeout=60, )
def test_autoscaling_with_ensemble_nodes(serve_instance): signal = SignalActor.remote() autoscaling_config = { "metrics_interval_s": 0.1, "min_replicas": 0, "max_replicas": 2, "look_back_period_s": 0.4, "downscale_delay_s": 30, "upscale_delay_s": 0, } @serve.deployment( _autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ) class Model: def __init__(self, weight): self.weight = weight def forward(self, input): return input + self.weight @serve.deployment( _autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ) def combine(value_refs): ray.get(signal.wait.remote()) return sum(ray.get(value_refs)) with InputNode() as user_input: model1 = Model.bind(0) model2 = Model.bind(1) output1 = model1.forward.bind(user_input) output2 = model2.forward.bind(user_input) output = combine.bind([output1, output2]) serve_dag = DAGDriver.options( route_prefix="/my-dag", _autoscaling_config=autoscaling_config, _graceful_shutdown_timeout_s=1, ).bind(output) dag_handle = serve.run(serve_dag) controller = serve_instance._controller assert get_num_running_replicas(controller, "Model") == 0 assert get_num_running_replicas(controller, "Model_1") == 0 assert get_num_running_replicas(controller, "combine") == 0 # upscaling [dag_handle.predict.remote(0) for _ in range(10)] wait_for_condition( lambda: get_num_running_replicas(controller, DAGDriver.name) >= 1) wait_for_condition( lambda: get_num_running_replicas(controller, "Model") >= 1, timeout=40) wait_for_condition( lambda: get_num_running_replicas(controller, "Model_1") >= 1, timeout=40) wait_for_condition( lambda: get_num_running_replicas(controller, "combine") >= 2, timeout=40) signal.send.remote() # downscaling wait_for_condition( lambda: get_num_running_replicas(controller, DAGDriver.name) == 0, timeout=60, ) wait_for_condition( lambda: get_num_running_replicas(controller, "Model") == 0, timeout=60, ) wait_for_condition( lambda: get_num_running_replicas(controller, "Model_1") == 0, timeout=60, ) wait_for_condition( lambda: get_num_running_replicas(controller, "combine") == 0, timeout=60)
def forward(self, input: ModelInputData): return input.model_input1 + len(input.model_input2) + self.weight @serve.deployment def combine(value_refs): return sum(ray.get(value_refs)) with InputNode() as user_input: model1 = Model.bind(0) model2 = Model.bind(1) output1 = model1.forward.bind(user_input) output2 = model2.forward.bind(user_input) dag = combine.bind([output1, output2]) serve_dag = DAGDriver.options(route_prefix="/my-dag").bind( dag, http_adapter=ModelInputData) dag_handle = serve.run(serve_dag) print( ray.get( dag_handle.predict.remote( ModelInputData(model_input1=1, model_input2="test")))) print( requests.post("http://127.0.0.1:8000/my-dag", json={ "model_input1": 1, "model_input2": "test" }).text)