def test_with_proto(): # Test roundtrip config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16) assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes()) # Test user_config object config = DeploymentConfig(user_config={"python": ("native", ["objects"])}) assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
def from_proto(cls, proto: DeploymentInfoProto): deployment_config = (DeploymentConfig.from_proto( proto.deployment_config) if proto.deployment_config else None) data = { "deployment_config": deployment_config, "replica_config": ReplicaConfig.from_proto( proto.replica_config, deployment_config.deployment_language if deployment_config else DeploymentLanguage.PYTHON, ), "start_time_ms": proto.start_time_ms, "actor_name": proto.actor_name if proto.actor_name != "" else None, "serialized_deployment_def": proto.serialized_deployment_def if proto.serialized_deployment_def != b"" else None, "version": proto.version if proto.version != "" else None, "end_time_ms": proto.end_time_ms if proto.end_time_ms != 0 else None, "deployer_job_id": ray.get_runtime_context().job_id, } return cls(**data)
def test_invalid_use_sync_handle(): deployment = Deployment( Actor, "test", DeploymentConfig(), _internal=True, ) with pytest.raises( ValueError, match=f"{USE_SYNC_HANDLE_KEY} should only be set with a boolean value", ): _ = DeploymentNode( Actor, "test", [], {}, {}, other_args_to_resolve={USE_SYNC_HANDLE_KEY: {"options_a": "hii"}}, ) with pytest.raises( ValueError, match=f"{USE_SYNC_HANDLE_KEY} should only be set with a boolean value", ): _ = DeploymentMethodNode( deployment, "method", [], {}, {}, other_args_to_resolve={ USE_SYNC_HANDLE_KEY: None, }, )
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], route_prefix: Optional[str], deployer_job_id: "ray._raylet.JobID", ) -> Tuple[Optional[GoalId], bool]: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if prev_version is not None: existing_deployment_info = self.deployment_state_manager.get_deployment( name) if existing_deployment_info is None or not existing_deployment_info.version: raise ValueError( f"prev_version '{prev_version}' is specified but " "there is no existing deployment.") if existing_deployment_info.version != prev_version: raise ValueError( f"prev_version '{prev_version}' " "does not match with the existing " f"version '{existing_deployment_info.version}'.") autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None deployment_info = DeploymentInfo( actor_name=name, serialized_deployment_def=replica_config.serialized_deployment_def, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? goal_id, updating = self.deployment_state_manager.deploy( name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) return goal_id, updating
def from_proto(cls, proto: DeploymentInfoProto): deployment_config = (DeploymentConfig.from_proto( proto.deployment_config) if proto.deployment_config else None) data = { "deployment_config": deployment_config, "replica_config": ReplicaConfig.from_proto( proto.replica_config, deployment_config.needs_pickle() if deployment_config else True, ), "start_time_ms": proto.start_time_ms, "actor_name": proto.actor_name if proto.actor_name != "" else None, "version": proto.version if proto.version != "" else None, "end_time_ms": proto.end_time_ms if proto.end_time_ms != 0 else None, "deployer_job_id": ray.get_runtime_context().job_id, } return cls(**data)
def schema_to_deployment(s: DeploymentSchema) -> Deployment: if s.ray_actor_options is None: ray_actor_options = None else: ray_actor_options = s.ray_actor_options.dict(exclude_unset=True) config = DeploymentConfig.from_default( ignore_none=True, num_replicas=s.num_replicas, user_config=s.user_config, max_concurrent_queries=s.max_concurrent_queries, autoscaling_config=s.autoscaling_config, graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s, health_check_period_s=s.health_check_period_s, health_check_timeout_s=s.health_check_timeout_s, ) return Deployment( func_or_class=s.import_path, name=s.name, config=config, init_args=(), init_kwargs={}, route_prefix=s.route_prefix, ray_actor_options=ray_actor_options, _internal=True, )
def __init__( self, func_body: Union[Callable, str], deployment_name, func_args, func_kwargs, func_options, other_args_to_resolve=None, ): self._body = func_body self._deployment_name = deployment_name super().__init__( func_args, func_kwargs, func_options, other_args_to_resolve=other_args_to_resolve, ) if "deployment_schema" in self._bound_other_args_to_resolve: deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[ "deployment_schema" ] deployment_shell = schema_to_deployment(deployment_schema) # Prefer user specified name to override the generated one. if ( inspect.isfunction(func_body) and deployment_shell.name != func_body.__name__ ): self._deployment_name = deployment_shell.name # Set the route prefix, prefer the one user supplied, # otherwise set it to /deployment_name if ( deployment_shell.route_prefix is None or deployment_shell.route_prefix != f"/{deployment_shell.name}" ): route_prefix = deployment_shell.route_prefix else: route_prefix = f"/{deployment_name}" self._deployment = deployment_shell.options( func_or_class=func_body, name=self._deployment_name, init_args=(), init_kwargs={}, route_prefix=route_prefix, ) else: self._deployment: Deployment = Deployment( func_body, deployment_name, DeploymentConfig(), init_args=tuple(), init_kwargs=dict(), ray_actor_options=func_options, _internal=True, ) # TODO (jiaodong): Polish with async handle support later self._deployment_handle = RayServeLazySyncHandle(self._deployment.name)
def test_zero_default_proto(): # Test that options set to zero (protobuf default value) still retain their # original value after being serialized and deserialized. config = DeploymentConfig( autoscaling_config={ "min_replicas": 1, "max_replicas": 2, "smoothing_factor": 0.123, "downscale_delay_s": 0 }) serialized_config = config.to_proto_bytes() deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config) new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s assert new_delay_s == 0 # Check that this test is not spuriously passing. default_downscale_delay_s = AutoscalingConfig().downscale_delay_s assert new_delay_s != default_downscale_delay_s
def test_from_default(self, ignore_none): """Check from_default() method behavior.""" # Valid parameters dc = DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas=5, is_cross_language=True) assert dc.num_replicas == 5 assert dc.is_cross_language is True # Invalid parameters should raise TypeError with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas=5, is_xlang=True) # Validation should still be performed with pytest.raises(ValidationError): DeploymentConfig.from_default(ignore_none=ignore_none, num_replicas="hello world")
def deploy( self, name: str, deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, route_prefix: Optional[str], deployer_job_id: Union["ray._raylet.JobID", bytes], ) -> bool: if route_prefix is not None: assert route_prefix.startswith("/") deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes ) version = deployment_config.version replica_config = ReplicaConfig.from_proto_bytes( replica_config_proto_bytes, deployment_config.needs_pickle() ) autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: autoscaling_policy = None if isinstance(deployer_job_id, bytes): deployer_job_id = ray.JobID.from_int( int.from_bytes(deployer_job_id, "little") ) deployment_info = DeploymentInfo( actor_name=name, version=version, deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), autoscaling_policy=autoscaling_policy, ) # TODO(architkulkarni): When a deployment is redeployed, even if # the only change was num_replicas, the start_time_ms is refreshed. # Is this the desired behaviour? updating = self.deployment_state_manager.deploy(name, deployment_info) if route_prefix is not None: endpoint_info = EndpointInfo(route=route_prefix) self.endpoint_state.update_endpoint(name, endpoint_info) else: self.endpoint_state.delete_endpoint(name) return updating
def from_json(cls, input_json, object_hook=None): assert input_json[DAGNODE_TYPE_KEY] == DeploymentMethodNode.__name__ return cls( Deployment( input_json["import_path"], input_json["deployment_name"], # TODO: (jiaodong) Support deployment config from user input DeploymentConfig(), init_args=input_json["args"], init_kwargs=input_json["kwargs"], ray_actor_options=input_json["options"], _internal=True, ), input_json["deployment_method_name"], input_json["args"], input_json["kwargs"], input_json["options"], other_args_to_resolve=input_json["other_args_to_resolve"], )
def test_from_default_ignore_none(self): """Check from_default()'s ignore_none parameter""" default = DeploymentConfig() # Valid parameter with None passed in should be ignored dc = DeploymentConfig.from_default(ignore_none=True, num_replicas=None) # Invalid parameter should raise TypeError no matter what with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=True, fake=5) with pytest.raises(TypeError): DeploymentConfig.from_default(ignore_none=False, fake=5) # Validators should run no matter what dc = DeploymentConfig.from_default(ignore_none=True, max_concurrent_queries=None) assert dc.max_concurrent_queries == default.max_concurrent_queries dc = DeploymentConfig.from_default(ignore_none=False, max_concurrent_queries=None) assert dc.max_concurrent_queries is not None assert dc.max_concurrent_queries == default.max_concurrent_queries
def schema_to_deployment(s: DeploymentSchema) -> Deployment: """Creates a deployment with parameters specified in schema. The returned deployment CANNOT be deployed immediately. It's func_or_class value is an empty string (""), which is not a valid import path. The func_or_class value must be overwritten with a valid function or class before the deployment can be deployed. """ if s.ray_actor_options is None: ray_actor_options = None else: ray_actor_options = s.ray_actor_options.dict(exclude_unset=True) config = DeploymentConfig.from_default( ignore_none=True, num_replicas=s.num_replicas, user_config=s.user_config, max_concurrent_queries=s.max_concurrent_queries, autoscaling_config=s.autoscaling_config, graceful_shutdown_wait_loop_s=s.graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=s.graceful_shutdown_timeout_s, health_check_period_s=s.health_check_period_s, health_check_timeout_s=s.health_check_timeout_s, ) return Deployment( func_or_class="", name=s.name, config=config, init_args=(), init_kwargs={}, route_prefix=s.route_prefix, ray_actor_options=ray_actor_options, _internal=True, )
async def __init__( self, deployment_name, replica_tag, serialized_deployment_def: bytes, serialized_init_args: bytes, serialized_init_kwargs: bytes, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, detached: bool, ): configure_component_logger( component_type="deployment", component_name=deployment_name, component_id=replica_tag, ) deployment_def = cloudpickle.loads(serialized_deployment_def) if isinstance(deployment_def, str): import_path = deployment_def module_name, attr_name = parse_import_path(import_path) deployment_def = getattr(import_module(module_name), attr_name) # For ray or serve decorated class or function, strip to return # original body if isinstance(deployment_def, RemoteFunction): deployment_def = deployment_def._function elif isinstance(deployment_def, ActorClass): deployment_def = deployment_def.__ray_metadata__.modified_class elif isinstance(deployment_def, Deployment): logger.warning( f'The import path "{import_path}" contains a ' "decorated Serve deployment. The decorator's settings " "are ignored when deploying via import path.") deployment_def = deployment_def.func_or_class init_args = cloudpickle.loads(serialized_init_args) init_kwargs = cloudpickle.loads(serialized_init_kwargs) deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if inspect.isfunction(deployment_def): is_function = True elif inspect.isclass(deployment_def): is_function = False else: assert False, ( "deployment_def must be function, class, or " "corresponding import path. Instead, it's type was " f"{type(deployment_def)}.") # Set the controller name so that serve.connect() in the user's # code will connect to the instance that this deployment is running # in. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=None, ) assert controller_name, "Must provide a valid controller_name" controller_handle = ray.get_actor(controller_name, namespace=SERVE_NAMESPACE) # This closure initializes user code and finalizes replica # startup. By splitting the initialization step like this, # we can already access this actor before the user code # has finished initializing. # The supervising state manager can then wait # for allocation of this replica by using the `is_allocated` # method. After that, it calls `reconfigure` to trigger # user code initialization. async def initialize_replica(): if is_function: _callable = deployment_def else: # This allows deployments to define an async __init__ # method (required for FastAPI). _callable = deployment_def.__new__(deployment_def) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.context.set_internal_replica_context( deployment_name, replica_tag, controller_name, servable_object=_callable, ) self.replica = RayServeReplica( _callable, deployment_name, replica_tag, deployment_config, deployment_config.user_config, version, is_function, controller_handle, ) # Is it fine that replica is None here? # Should we add a check in all methods that use self.replica # or, alternatively, create an async get_replica() method? self.replica = None self._initialize_replica = initialize_replica
def deployment( _func_or_class: Optional[Callable] = None, name: Optional[str] = None, version: Optional[str] = None, prev_version: Optional[str] = None, num_replicas: Optional[int] = None, init_args: Optional[Tuple[Any]] = None, init_kwargs: Optional[Dict[Any, Any]] = None, route_prefix: Optional[str] = None, ray_actor_options: Optional[Dict] = None, user_config: Optional[Any] = None, max_concurrent_queries: Optional[int] = None, _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, _graceful_shutdown_wait_loop_s: Optional[float] = None, _graceful_shutdown_timeout_s: Optional[float] = None ) -> Callable[[Callable], Deployment]: """Define a Serve deployment. Args: name (Optional[str]): Globally-unique name identifying this deployment. If not provided, the name of the class or function will be used. version (Optional[str]): Version of the deployment. This is used to indicate a code change for the deployment; when it is re-deployed with a version change, a rolling update of the replicas will be performed. If not provided, every deployment will be treated as a new version. prev_version (Optional[str]): Version of the existing deployment which is used as a precondition for the next deployment. If prev_version does not match with the existing deployment's version, the deployment will fail. If not provided, deployment procedure will not check the existing deployment's version. num_replicas (Optional[int]): The number of processes to start up that will handle requests to this deployment. Defaults to 1. init_args (Optional[Tuple]): Positional args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. init_kwargs (Optional[Dict]): Keyword args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. route_prefix (Optional[str]): Requests to paths under this HTTP path prefix will be routed to this deployment. Defaults to '/{name}'. Routing is done based on longest-prefix match, so if you have deployment A with a prefix of '/a' and deployment B with a prefix of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with a '/' unless they're the root (just '/'), which acts as a catch-all. ray_actor_options (dict): Options to be passed to the Ray actor constructor such as resource requirements. user_config (Optional[Any]): [experimental] Config to pass to the reconfigure method of the deployment. This can be updated dynamically without changing the version of the deployment and restarting its replicas. The user_config needs to be hashable to keep track of updates, so it must only contain hashable types, or hashable types nested in lists and dictionaries. max_concurrent_queries (Optional[int]): The maximum number of queries that will be sent to a replica of this deployment without receiving a response. Defaults to 100. Example: >>> @serve.deployment(name="deployment1", version="v1") class MyDeployment: pass >>> MyDeployment.deploy(*init_args) >>> MyDeployment.options(num_replicas=2, init_args=init_args).deploy() Returns: Deployment """ if num_replicas is not None \ and _autoscaling_config is not None: raise ValueError("Manually setting num_replicas is not allowed when " "_autoscaling_config is provided.") config = DeploymentConfig() if num_replicas is not None: config.num_replicas = num_replicas if user_config is not None: config.user_config = user_config if max_concurrent_queries is not None: config.max_concurrent_queries = max_concurrent_queries if _autoscaling_config is not None: config.autoscaling_config = _autoscaling_config if _graceful_shutdown_wait_loop_s is not None: config.graceful_shutdown_wait_loop_s = _graceful_shutdown_wait_loop_s if _graceful_shutdown_timeout_s is not None: config.graceful_shutdown_timeout_s = _graceful_shutdown_timeout_s def decorator(_func_or_class): return Deployment( _func_or_class, name if name is not None else _func_or_class.__name__, config, version=version, prev_version=prev_version, init_args=init_args, init_kwargs=init_kwargs, route_prefix=route_prefix, ray_actor_options=ray_actor_options, _internal=True, ) # This handles both parametrized and non-parametrized usage of the # decorator. See the @serve.batch code for more details. return decorator(_func_or_class) if callable(_func_or_class) else decorator
async def __init__( self, deployment_name, replica_tag, init_args, init_kwargs, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, controller_namespace: str, detached: bool, ): if import_path is not None: module_name, attr_name = parse_import_path(import_path) deployment_def = getattr(import_module(module_name), attr_name) else: deployment_def = cloudpickle.loads(serialized_deployment_def) deployment_config = DeploymentConfig.from_proto_bytes( deployment_config_proto_bytes) if inspect.isfunction(deployment_def): is_function = True elif inspect.isclass(deployment_def): is_function = False else: assert False, ( "deployment_def must be function, class, or " "corresponding import path. Instead, it's type was " f"{type(deployment_def)}.") # Set the controller name so that serve.connect() in the user's # code will connect to the instance that this deployment is running # in. ray.serve.api._set_internal_replica_context( deployment_name, replica_tag, controller_name, controller_namespace, servable_object=None, ) assert controller_name, "Must provide a valid controller_name" controller_handle = ray.get_actor(controller_name, namespace=controller_namespace) # This closure initializes user code and finalizes replica # startup. By splitting the initialization step like this, # we can already access this actor before the user code # has finished initializing. # The supervising state manager can then wait # for allocation of this replica by using the `is_allocated` # method. After that, it calls `reconfigure` to trigger # user code initialization. async def initialize_replica(): if is_function: _callable = deployment_def else: # This allows deployments to define an async __init__ # method (required for FastAPI). _callable = deployment_def.__new__(deployment_def) await sync_to_async(_callable.__init__)(*init_args, **init_kwargs) # Setting the context again to update the servable_object. ray.serve.api._set_internal_replica_context( deployment_name, replica_tag, controller_name, controller_namespace, servable_object=_callable, ) self.replica = RayServeReplica( _callable, deployment_name, replica_tag, deployment_config, deployment_config.user_config, version, is_function, controller_handle, ) # Is it fine that replica is None here? # Should we add a check in all methods that use self.replica # or, alternatively, create an async get_replica() method? self.replica = None self._initialize_replica = initialize_replica
def __init__( self, # For serve structured deployment, deployment body can be import path # to the class or function instead. func_or_class: Union[Callable, str], deployment_name: str, deployment_init_args: Tuple[Any], deployment_init_kwargs: Dict[str, Any], ray_actor_options: Dict[str, Any], other_args_to_resolve: Optional[Dict[str, Any]] = None, ): # Assign instance variables in base class constructor. super().__init__( deployment_init_args, deployment_init_kwargs, ray_actor_options, other_args_to_resolve=other_args_to_resolve, ) if self._contains_input_node(): raise ValueError( "InputNode handles user dynamic input the the DAG, and " "cannot be used as args, kwargs, or other_args_to_resolve " "in the DeploymentNode constructor because it is not available " "at class construction or binding time.") # Deployment can be passed into other DAGNodes as init args. This is # supported pattern in ray DAG that user can instantiate and pass class # instances as init args to others. # However in ray serve we send init args via .remote() that requires # pickling, and all DAGNode types are not picklable by design. # Thus we need convert all DeploymentNode used in init args into # deployment handles (executable and picklable) in ray serve DAG to make # serve DAG end to end executable. def replace_with_handle(node): if isinstance(node, DeploymentNode): return node._get_serve_deployment_handle( node._deployment, node._bound_other_args_to_resolve) elif isinstance(node, (DeploymentMethodNode, DeploymentFunctionNode)): from ray.serve.pipeline.json_serde import DAGNodeEncoder serve_dag_root_json = json.dumps(node, cls=DAGNodeEncoder) return RayServeDAGHandle(serve_dag_root_json) ( replaced_deployment_init_args, replaced_deployment_init_kwargs, ) = self.apply_functional( [deployment_init_args, deployment_init_kwargs], predictate_fn=lambda node: isinstance(node, ( DeploymentNode, DeploymentMethodNode, DeploymentFunctionNode)), apply_fn=replace_with_handle, ) if "deployment_schema" in self._bound_other_args_to_resolve: deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[ "deployment_schema"] deployment_shell = schema_to_deployment(deployment_schema) # Prefer user specified name to override the generated one. if (inspect.isclass(func_or_class) and deployment_shell.name != func_or_class.__name__): deployment_name = deployment_shell.name # Set the route prefix, prefer the one user supplied, # otherwise set it to /deployment_name if (deployment_shell.route_prefix is None or deployment_shell.route_prefix != f"/{deployment_shell.name}"): route_prefix = deployment_shell.route_prefix else: route_prefix = f"/{deployment_name}" self._deployment = deployment_shell.options( func_or_class=func_or_class, name=deployment_name, init_args=replaced_deployment_init_args, init_kwargs=replaced_deployment_init_kwargs, route_prefix=route_prefix, ) else: self._deployment: Deployment = Deployment( func_or_class, deployment_name, # TODO: (jiaodong) Support deployment config from user input DeploymentConfig(), init_args=replaced_deployment_init_args, init_kwargs=replaced_deployment_init_kwargs, ray_actor_options=ray_actor_options, _internal=True, ) self._deployment_handle: Union[ RayServeLazySyncHandle, RayServeHandle, RayServeSyncHandle] = self._get_serve_deployment_handle( self._deployment, other_args_to_resolve)
def get_deploy_args( self, name: str, deployment_def: Union[Callable, Type[Callable], str], init_args: Tuple[Any], init_kwargs: Dict[Any, Any], ray_actor_options: Optional[Dict] = None, config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None, version: Optional[str] = None, route_prefix: Optional[str] = None, ) -> Dict: """ Takes a deployment's configuration, and returns the arguments needed for the controller to deploy it. """ if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: # It is illegal to set field working_dir to None. if curr_job_env.get("working_dir") is not None: ray_actor_options["runtime_env"].setdefault( "working_dir", curr_job_env.get("working_dir")) else: ray_actor_options["runtime_env"] = curr_job_env replica_config = ReplicaConfig.create( deployment_def, init_args=init_args, init_kwargs=init_kwargs, ray_actor_options=ray_actor_options, ) if isinstance(config, dict): deployment_config = DeploymentConfig.parse_obj(config) elif isinstance(config, DeploymentConfig): deployment_config = config else: raise TypeError( "config must be a DeploymentConfig or a dictionary.") deployment_config.version = version if (deployment_config.autoscaling_config is not None and deployment_config.max_concurrent_queries < deployment_config.autoscaling_config. target_num_ongoing_requests_per_replica # noqa: E501 ): logger.warning("Autoscaling will never happen, " "because 'max_concurrent_queries' is less than " "'target_num_ongoing_requests_per_replica' now.") controller_deploy_args = { "name": name, "deployment_config_proto_bytes": deployment_config.to_proto_bytes(), "replica_config_proto_bytes": replica_config.to_proto_bytes(), "route_prefix": route_prefix, "deployer_job_id": ray.get_runtime_context().job_id, } return controller_deploy_args
def deploy( self, name: str, deployment_def: Union[Callable, Type[Callable], str], init_args: Tuple[Any], init_kwargs: Dict[Any, Any], ray_actor_options: Optional[Dict] = None, config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None, version: Optional[str] = None, prev_version: Optional[str] = None, route_prefix: Optional[str] = None, url: Optional[str] = None, _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: ray_actor_options = {} curr_job_env = ray.get_runtime_context().runtime_env if "runtime_env" in ray_actor_options: ray_actor_options["runtime_env"].setdefault( "working_dir", curr_job_env.get("working_dir")) else: ray_actor_options["runtime_env"] = curr_job_env replica_config = ReplicaConfig( deployment_def, init_args=init_args, init_kwargs=init_kwargs, ray_actor_options=ray_actor_options) if isinstance(config, dict): deployment_config = DeploymentConfig.parse_obj(config) elif isinstance(config, DeploymentConfig): deployment_config = config else: raise TypeError( "config must be a DeploymentConfig or a dictionary.") if deployment_config.autoscaling_config is not None and \ deployment_config.max_concurrent_queries < deployment_config. \ autoscaling_config.target_num_ongoing_requests_per_replica: logger.warning("Autoscaling will never happen, " "because 'max_concurrent_queries' is less than " "'target_num_ongoing_requests_per_replica' now.") goal_id, updating = ray.get( self._controller.deploy.remote(name, deployment_config.to_proto_bytes(), replica_config, version, prev_version, route_prefix, ray.get_runtime_context().job_id)) tag = f"component=serve deployment={name}" if updating: msg = f"Updating deployment '{name}'" if version is not None: msg += f" to version '{version}'" logger.info(f"{msg}. {tag}") else: logger.info(f"Deployment '{name}' is already at version " f"'{version}', not updating. {tag}") if _blocking: self._wait_for_goal(goal_id) if url is not None: url_part = f" at `{url}`" else: url_part = "" logger.info( f"Deployment '{name}{':'+version if version else ''}' is ready" f"{url_part}. {tag}") else: return goal_id
def deployment( _func_or_class: Optional[Callable] = None, name: Optional[str] = None, version: Optional[str] = None, num_replicas: Optional[int] = None, init_args: Optional[Tuple[Any]] = None, init_kwargs: Optional[Dict[Any, Any]] = None, route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE, ray_actor_options: Optional[Dict] = None, user_config: Optional[Any] = None, max_concurrent_queries: Optional[int] = None, _autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, _graceful_shutdown_wait_loop_s: Optional[float] = None, _graceful_shutdown_timeout_s: Optional[float] = None, _health_check_period_s: Optional[float] = None, _health_check_timeout_s: Optional[float] = None, ) -> Callable[[Callable], Deployment]: """Define a Serve deployment. Args: name (Optional[str]): Globally-unique name identifying this deployment. If not provided, the name of the class or function will be used. version (Optional[str]): Version of the deployment. This is used to indicate a code change for the deployment; when it is re-deployed with a version change, a rolling update of the replicas will be performed. If not provided, every deployment will be treated as a new version. num_replicas (Optional[int]): The number of processes to start up that will handle requests to this deployment. Defaults to 1. init_args (Optional[Tuple]): Positional args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. init_kwargs (Optional[Dict]): Keyword args to be passed to the class constructor when starting up deployment replicas. These can also be passed when you call `.deploy()` on the returned Deployment. route_prefix (Optional[str]): Requests to paths under this HTTP path prefix will be routed to this deployment. Defaults to '/{name}'. When set to 'None', no HTTP endpoint will be created. Routing is done based on longest-prefix match, so if you have deployment A with a prefix of '/a' and deployment B with a prefix of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with a '/' unless they're the root (just '/'), which acts as a catch-all. ray_actor_options: Options to be passed to the Ray actor constructor such as resource requirements. user_config (Optional[Any]): [experimental] Config to pass to the reconfigure method of the deployment. This can be updated dynamically without changing the version of the deployment and restarting its replicas. The user_config needs to be hashable to keep track of updates, so it must only contain hashable types, or hashable types nested in lists and dictionaries. max_concurrent_queries (Optional[int]): The maximum number of queries that will be sent to a replica of this deployment without receiving a response. Defaults to 100. Example: >>> from ray import serve >>> @serve.deployment(name="deployment1", version="v1") # doctest: +SKIP ... class MyDeployment: # doctest: +SKIP ... pass # doctest: +SKIP >>> MyDeployment.deploy(*init_args) # doctest: +SKIP >>> MyDeployment.options( # doctest: +SKIP ... num_replicas=2, init_args=init_args).deploy() Returns: Deployment """ # Num of replicas should not be 0. # TODO(Sihan) seperate num_replicas attribute from internal and api if num_replicas == 0: raise ValueError("num_replicas is expected to larger than 0") if num_replicas is not None and _autoscaling_config is not None: raise ValueError("Manually setting num_replicas is not allowed when " "_autoscaling_config is provided.") config = DeploymentConfig.from_default( ignore_none=True, num_replicas=num_replicas, user_config=user_config, max_concurrent_queries=max_concurrent_queries, autoscaling_config=_autoscaling_config, graceful_shutdown_wait_loop_s=_graceful_shutdown_wait_loop_s, graceful_shutdown_timeout_s=_graceful_shutdown_timeout_s, health_check_period_s=_health_check_period_s, health_check_timeout_s=_health_check_timeout_s, ) def decorator(_func_or_class): return Deployment( _func_or_class, name if name is not None else _func_or_class.__name__, config, version=version, init_args=init_args, init_kwargs=init_kwargs, route_prefix=route_prefix, ray_actor_options=ray_actor_options, _internal=True, ) # This handles both parametrized and non-parametrized usage of the # decorator. See the @serve.batch code for more details. return decorator(_func_or_class) if callable(_func_or_class) else decorator
def __init__( self, # For serve structured deployment, deployment body can be import path # to the class or function instead. func_or_class: Union[Callable, str], deployment_name: str, deployment_init_args: Tuple[Any], deployment_init_kwargs: Dict[str, Any], ray_actor_options: Dict[str, Any], other_args_to_resolve: Optional[Dict[str, Any]] = None, ): # Assign instance variables in base class constructor. super().__init__( deployment_init_args, deployment_init_kwargs, ray_actor_options, other_args_to_resolve=other_args_to_resolve, ) # Deployment can be passed into other DAGNodes as init args. This is # supported pattern in ray DAG that user can instantiate and pass class # instances as init args to others. # However in ray serve we send init args via .remote() that requires # pickling, and all DAGNode types are not picklable by design. # Thus we need convert all DeploymentNode used in init args into # deployment handles (executable and picklable) in ray serve DAG to make # serve DAG end to end executable. # TODO(jiaodong): This part does some magic for DAGDriver and will throw # error with weird pickle replace table error. Move this out. def replace_with_handle(node): if isinstance(node, DeploymentNode): return RayServeLazySyncHandle(node._deployment.name) elif isinstance(node, DeploymentExecutorNode): return node._deployment_handle ( replaced_deployment_init_args, replaced_deployment_init_kwargs, ) = self.apply_functional( [deployment_init_args, deployment_init_kwargs], predictate_fn=lambda node: isinstance( node, # We need to match and replace all DAGNodes even though they # could be None, because no DAGNode replacement should run into # re-resolved child DAGNodes, otherwise with KeyError ( DeploymentNode, DeploymentMethodNode, DeploymentFunctionNode, DeploymentExecutorNode, DeploymentFunctionExecutorNode, DeploymentMethodExecutorNode, ), ), apply_fn=replace_with_handle, ) if "deployment_schema" in self._bound_other_args_to_resolve: deployment_schema: DeploymentSchema = self._bound_other_args_to_resolve[ "deployment_schema" ] deployment_shell = schema_to_deployment(deployment_schema) # Prefer user specified name to override the generated one. if ( inspect.isclass(func_or_class) and deployment_shell.name != func_or_class.__name__ ): deployment_name = deployment_shell.name # Set the route prefix, prefer the one user supplied, # otherwise set it to /deployment_name if ( deployment_shell.route_prefix is None or deployment_shell.route_prefix != f"/{deployment_shell.name}" ): route_prefix = deployment_shell.route_prefix else: route_prefix = f"/{deployment_name}" self._deployment = deployment_shell.options( func_or_class=func_or_class, name=deployment_name, init_args=replaced_deployment_init_args, init_kwargs=replaced_deployment_init_kwargs, route_prefix=route_prefix, ) else: self._deployment: Deployment = Deployment( func_or_class, deployment_name, # TODO: (jiaodong) Support deployment config from user input DeploymentConfig(), init_args=replaced_deployment_init_args, init_kwargs=replaced_deployment_init_kwargs, ray_actor_options=ray_actor_options, _internal=True, ) self._deployment_handle = RayServeLazySyncHandle(self._deployment.name)