def test_prefix_match(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({ "endpoint1": EndpointInfo({"POST"}, route="/test/test2"), "endpoint2": EndpointInfo({"POST"}, route="/test"), "endpoint3": EndpointInfo({"POST"}, route="/"), }) route, handle = router.match_route("/test/test2/subpath", "POST") assert route == "/test/test2" and handle == "endpoint1" route, handle = router.match_route("/test/test2/", "POST") assert route == "/test/test2" and handle == "endpoint1" route, handle = router.match_route("/test/test2", "POST") assert route == "/test/test2" and handle == "endpoint1" route, handle = router.match_route("/test/subpath", "POST") assert route == "/test" and handle == "endpoint2" route, handle = router.match_route("/test/", "POST") assert route == "/test" and handle == "endpoint2" route, handle = router.match_route("/test", "POST") assert route == "/test" and handle == "endpoint2" route, handle = router.match_route("/test2", "POST") assert route == "/" and handle == "endpoint3" route, handle = router.match_route("/", "POST") assert route == "/" and handle == "endpoint3"
def test_listen_for_change_java(serve_instance): host = ray.remote(LongPollHost).remote() ray.get(host.notify_changed.remote("key_1", 999)) request_1 = {"keys_to_snapshot_ids": {"key_1": -1}} object_ref = host.listen_for_change_java.remote( LongPollRequest(**request_1).SerializeToString()) result_1: bytes = ray.get(object_ref) poll_result_1 = LongPollResult.FromString(result_1) assert set(poll_result_1.updated_objects.keys()) == {"key_1"} assert poll_result_1.updated_objects["key_1"].object_snapshot.decode( ) == "999" request_2 = {"keys_to_snapshot_ids": {"ROUTE_TABLE": -1}} endpoints: Dict[EndpointTag, EndpointInfo] = dict() endpoints["deployment_name"] = EndpointInfo(route="/test/xlang/poll") endpoints["deployment_name1"] = EndpointInfo(route="/test/xlang/poll1") ray.get( host.notify_changed.remote(LongPollNamespace.ROUTE_TABLE, endpoints)) object_ref_2 = host.listen_for_change_java.remote( LongPollRequest(**request_2).SerializeToString()) result_2: bytes = ray.get(object_ref_2) poll_result_2 = LongPollResult.FromString(result_2) assert set(poll_result_2.updated_objects.keys()) == {"ROUTE_TABLE"} endpoint_set = EndpointSet.FromString( poll_result_2.updated_objects["ROUTE_TABLE"].object_snapshot) assert set(endpoint_set.endpoints.keys()) == { "deployment_name", "deployment_name1" } assert endpoint_set.endpoints[ "deployment_name"].route == "/test/xlang/poll" request_3 = { "keys_to_snapshot_ids": { "(RUNNING_REPLICAS,deployment_name)": -1 } } replicas = [ RunningReplicaInfo( deployment_name="deployment_name", replica_tag=str(i), actor_handle=host, max_concurrent_queries=1, ) for i in range(2) ] ray.get( host.notify_changed.remote( (LongPollNamespace.RUNNING_REPLICAS, "deployment_name"), replicas)) object_ref_3 = host.listen_for_change_java.remote( LongPollRequest(**request_3).SerializeToString()) result_3: bytes = ray.get(object_ref_3) poll_result_3 = LongPollResult.FromString(result_3) replica_name_list = ActorNameList.FromString( poll_result_3.updated_objects["(RUNNING_REPLICAS,deployment_name)"]. object_snapshot) assert replica_name_list.names == ["SERVE_REPLICA::0", "SERVE_REPLICA::1"]
def test_update_routes(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({"endpoint": EndpointInfo({"POST"})}) route, handle = router.match_route("/endpoint", "POST") assert route == "/endpoint" and handle == "endpoint" router.update_routes({"endpoint2": EndpointInfo({"POST"})}) route, handle = router.match_route("/endpoint", "POST") assert route is None and handle is None route, handle = router.match_route("/endpoint2", "POST") assert route == "/endpoint2" and handle == "endpoint2"
def test_trailing_slash(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({ "endpoint": EndpointInfo({"POST"}, route="/test"), }) route, handle = router.match_route("/test/", "POST") assert route == "/test" and handle == "endpoint" router.update_routes({ "endpoint": EndpointInfo({"POST"}, route="/test/"), }) route, handle = router.match_route("/test", "POST") assert route is None and handle is None
async def deploy( self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], route_prefix: Optional[str]) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") async with self.write_lock: backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config) goal_id, updating = self.backend_state.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods, legacy=False) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({name: 1.0})) return goal_id, updating
async def create_endpoint( self, endpoint: str, traffic_dict: Dict[str, float], route: Optional[str], methods: Set[str], ) -> None: """Create a new endpoint with the specified route and methods. If the route is None, this is a "headless" endpoint that will not be exposed over HTTP and can only be accessed via a handle. """ async with self.write_lock: self._validate_traffic_dict(traffic_dict) logger.info( "Registering route '{}' to endpoint '{}' with methods '{}'.". format(route, endpoint, methods)) self.endpoint_state.create_endpoint( endpoint, EndpointInfo(methods, route=route), TrafficPolicy(traffic_dict)) # TODO(simon): Use GoalID mechanism for this so client can check for # goal id and http_state complete the goal id. await self.http_state.ensure_http_route_exists(endpoint, timeout_s=30)
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "ray._raylet.JobID", ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if prev_version is not None: existing_deployment_info = self.deployment_state_manager.get_deployment( name) if existing_deployment_info is None or not existing_deployment_info.version: raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_deployment_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_deployment_info.version}'.") autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None deployment_info = DeploymentInfo( actor_name=name, serialized_deployment_def=replica_config.serialized_deployment_def, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? goal_id, updating = self.deployment_state_manager.deploy( name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str], route_prefix: Optional[str]) -> Optional[GoalId]: if route_prefix is not None: assert route_prefix.startswith("/") python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo(worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) endpoint_info = EndpointInfo(ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({name: 1.0})) return goal_id
def test_default_route(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({"endpoint": EndpointInfo(route="/endpoint")}) route, handle = router.match_route("/nonexistent") assert route is None and handle is None route, handle = router.match_route("/endpoint") assert route == "/endpoint" and handle == "endpoint"
def test_match_method(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({ "endpoint": EndpointInfo({"POST", "GET"}, route="/test"), "endpoint2": EndpointInfo({"PATCH"}, route="/") }) route, handle = router.match_route("/test", "POST") assert route == "/test" and handle == "endpoint" route, handle = router.match_route("/test", "GET") assert route == "/test" and handle == "endpoint" route, handle = router.match_route("/test", "PATCH") assert route == "/" and handle == "endpoint2" route, handle = router.match_route("/test", "OPTIONS") assert route is None and handle is None
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: Union["ray._raylet.JobID", bytes], ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes ) version = deployment_config.version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.needs_pickle() ) autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None if isinstance(deployer_job_id, bytes): deployer_job_id = ray.JobID.from_int( int.from_bytes(deployer_job_id, "little") ) deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") async with self.write_lock: if prev_version is not None: existing_backend_info = self.backend_state.get_backend(name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo( actor_def=ray.remote( create_backend_replica( name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) goal_id, updating = self.backend_state.deploy_backend( name, backend_info) endpoint_info = EndpointInfo( ALL_HTTP_METHODS, route=route_prefix, python_methods=python_methods, legacy=False) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({ name: 1.0 })) return goal_id, updating
def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError(f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_replica_wrapper(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # This is probably not the desired behavior for an autoscaling # deployment, which redeploys very often to change num_replicas. goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
async def deploy(self, name: str, backend_config: BackendConfig, replica_config: ReplicaConfig, version: Optional[str], route_prefix: Optional[str]) -> Optional[GoalId]: if route_prefix is not None: assert route_prefix.startswith("/") if replica_config.is_asgi_app: # When the backend is asgi application, we want to proxy it # with a prefixed path as well as proxy all HTTP methods. http_methods = ALL_HTTP_METHODS else: # Generic endpoint should support a limited subset of HTTP methods. http_methods = ["GET", "POST"] python_methods = [] if inspect.isclass(replica_config.backend_def): for method_name, _ in inspect.getmembers( replica_config.backend_def, inspect.isfunction): python_methods.append(method_name) async with self.write_lock: backend_info = BackendInfo( worker_class=create_backend_replica( replica_config.backend_def), version=version, backend_config=backend_config, replica_config=replica_config) goal_id = self.backend_state.deploy_backend(name, backend_info) endpoint_info = EndpointInfo( http_methods, route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info, TrafficPolicy({ name: 1.0 })) return goal_id
def test_no_match(mock_longest_prefix_router): router = mock_longest_prefix_router router.update_routes({"endpoint": EndpointInfo({"POST"}, route="/hello")}) route, handle = router.match_route("/nonexistent", "POST") assert route is None and handle is None