def test_with_proto(): # Test roundtrip config = BackendConfig(num_replicas=100, max_concurrent_queries=16) assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes()) # Test user_config object config = BackendConfig(user_config={"python": ("native", ["objects"])}) assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes())
def set_max_concurrent_queries(self, backend_config_bytes: bytes): backend_config = BackendConfig.from_proto_bytes(backend_config_bytes) new_value: int = backend_config.max_concurrent_queries if new_value != self.max_concurrent_queries: self.max_concurrent_queries = new_value logger.debug( f"ReplicaSet: changing max_concurrent_queries to {new_value}") self.config_updated_event.set()
async def __init__(self, backend_tag, replica_tag, init_args, init_kwargs, backend_config_proto_bytes: bytes, version: BackendVersion, controller_name: str, detached: bool): backend = cloudpickle.loads(serialized_backend_def) backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if inspect.isfunction(backend): is_function = True elif inspect.isclass(backend): is_function = False else: assert False, ("backend_def must be function, class, or " "corresponding import path.") # Set the controller name so that serve.connect() in the user's # backend code will connect to the instance that this backend is # running in. ray.serve.api._set_internal_replica_context(backend_tag, replica_tag, controller_name, servable_object=None) if is_function: _callable = backend else: # This allows backends to define an async __init__ method # (required for FastAPI backend definition). _callable = backend.__new__(backend) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.api._set_internal_replica_context( backend_tag, replica_tag, controller_name, servable_object=_callable) assert controller_name, "Must provide a valid controller_name" controller_namespace = ray.serve.api._get_controller_namespace( detached) controller_handle = ray.get_actor(controller_name, namespace=controller_namespace) self.backend = RayServeReplica(_callable, backend_tag, replica_tag, backend_config, backend_config.user_config, version, is_function, controller_handle) # asyncio.Event used to signal that the replica is shutting down. self.shutdown_event = asyncio.Event()
def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError(f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_replica_wrapper(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # This is probably not the desired behavior for an autoscaling # deployment, which redeploys very often to change num_replicas. goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
async def deploy( self, name: str, backend_config_proto_bytes: bytes, replica_config: ReplicaConfig, python_methods: List[str], version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "Optional[ray._raylet.JobID]" = None ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") backend_config = BackendConfig.from_proto_bytes( backend_config_proto_bytes) async with self.write_lock: if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( name) if (existing_backend_info is None or not existing_backend_info.version): raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_backend_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_backend_info.version}'.") backend_info = BackendInfo(actor_def=ray.remote( create_backend_replica(name, replica_config.serialized_backend_def)), version=version, backend_config=backend_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000)) goal_id, updating = self.backend_state_manager.deploy_backend( name, backend_info) endpoint_info = EndpointInfo(route=route_prefix, python_methods=python_methods) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def _update_backend_configs(self, new_config_bytes: bytes) -> None: self.backend_config = BackendConfig.from_proto_bytes(new_config_bytes)