Пример #1
0
def test_with_proto():
    # Test roundtrip
    config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16)
    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())

    # Test user_config object
    config = DeploymentConfig(user_config={"python": ("native", ["objects"])})
    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
Пример #2
0
    def deploy(
        self,
        name: str,
        deployment_config_proto_bytes: bytes,
        replica_config: ReplicaConfig,
        version: Optional[str],
        prev_version: Optional[str],
        route_prefix: Optional[str],
        deployer_job_id: "ray._raylet.JobID",
    ) -> Tuple[Optional[GoalId], bool]:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        deployment_config = DeploymentConfig.from_proto_bytes(
            deployment_config_proto_bytes)

        if prev_version is not None:
            existing_deployment_info = self.deployment_state_manager.get_deployment(
                name)
            if existing_deployment_info is None or not existing_deployment_info.version:
                raise ValueError(
                    f"prev_version '{prev_version}' is specified but "
                    "there is no existing deployment.")
            if existing_deployment_info.version != prev_version:
                raise ValueError(
                    f"prev_version '{prev_version}' "
                    "does not match with the existing "
                    f"version '{existing_deployment_info.version}'.")

        autoscaling_config = deployment_config.autoscaling_config
        if autoscaling_config is not None:
            # TODO: is this the desired behaviour? Should this be a setting?
            deployment_config.num_replicas = autoscaling_config.min_replicas

            autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config)
        else:
            autoscaling_policy = None

        deployment_info = DeploymentInfo(
            actor_name=name,
            serialized_deployment_def=replica_config.serialized_deployment_def,
            version=version,
            deployment_config=deployment_config,
            replica_config=replica_config,
            deployer_job_id=deployer_job_id,
            start_time_ms=int(time.time() * 1000),
            autoscaling_policy=autoscaling_policy,
        )
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # Is this the desired behaviour?

        goal_id, updating = self.deployment_state_manager.deploy(
            name, deployment_info)

        if route_prefix is not None:
            endpoint_info = EndpointInfo(route=route_prefix)
            self.endpoint_state.update_endpoint(name, endpoint_info)

        return goal_id, updating
Пример #3
0
    def deploy(
        self,
        name: str,
        deployment_config_proto_bytes: bytes,
        replica_config_proto_bytes: bytes,
        route_prefix: Optional[str],
        deployer_job_id: Union["ray._raylet.JobID", bytes],
    ) -> bool:
        if route_prefix is not None:
            assert route_prefix.startswith("/")

        deployment_config = DeploymentConfig.from_proto_bytes(
            deployment_config_proto_bytes
        )
        version = deployment_config.version
        replica_config = ReplicaConfig.from_proto_bytes(
            replica_config_proto_bytes, deployment_config.needs_pickle()
        )

        autoscaling_config = deployment_config.autoscaling_config
        if autoscaling_config is not None:
            # TODO: is this the desired behaviour? Should this be a setting?
            deployment_config.num_replicas = autoscaling_config.min_replicas

            autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config)
        else:
            autoscaling_policy = None
        if isinstance(deployer_job_id, bytes):
            deployer_job_id = ray.JobID.from_int(
                int.from_bytes(deployer_job_id, "little")
            )
        deployment_info = DeploymentInfo(
            actor_name=name,
            version=version,
            deployment_config=deployment_config,
            replica_config=replica_config,
            deployer_job_id=deployer_job_id,
            start_time_ms=int(time.time() * 1000),
            autoscaling_policy=autoscaling_policy,
        )
        # TODO(architkulkarni): When a deployment is redeployed, even if
        # the only change was num_replicas, the start_time_ms is refreshed.
        # Is this the desired behaviour?
        updating = self.deployment_state_manager.deploy(name, deployment_info)

        if route_prefix is not None:
            endpoint_info = EndpointInfo(route=route_prefix)
            self.endpoint_state.update_endpoint(name, endpoint_info)
        else:
            self.endpoint_state.delete_endpoint(name)

        return updating
Пример #4
0
def test_zero_default_proto():
    # Test that options set to zero (protobuf default value) still retain their
    # original value after being serialized and deserialized.
    config = DeploymentConfig(
        autoscaling_config={
            "min_replicas": 1,
            "max_replicas": 2,
            "smoothing_factor": 0.123,
            "downscale_delay_s": 0
        })
    serialized_config = config.to_proto_bytes()
    deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config)
    new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s
    assert new_delay_s == 0

    # Check that this test is not spuriously passing.
    default_downscale_delay_s = AutoscalingConfig().downscale_delay_s
    assert new_delay_s != default_downscale_delay_s
Пример #5
0
        async def __init__(
            self,
            deployment_name,
            replica_tag,
            serialized_deployment_def: bytes,
            serialized_init_args: bytes,
            serialized_init_kwargs: bytes,
            deployment_config_proto_bytes: bytes,
            version: DeploymentVersion,
            controller_name: str,
            detached: bool,
        ):
            configure_component_logger(
                component_type="deployment",
                component_name=deployment_name,
                component_id=replica_tag,
            )

            deployment_def = cloudpickle.loads(serialized_deployment_def)

            if isinstance(deployment_def, str):
                import_path = deployment_def
                module_name, attr_name = parse_import_path(import_path)
                deployment_def = getattr(import_module(module_name), attr_name)
                # For ray or serve decorated class or function, strip to return
                # original body
                if isinstance(deployment_def, RemoteFunction):
                    deployment_def = deployment_def._function
                elif isinstance(deployment_def, ActorClass):
                    deployment_def = deployment_def.__ray_metadata__.modified_class
                elif isinstance(deployment_def, Deployment):
                    logger.warning(
                        f'The import path "{import_path}" contains a '
                        "decorated Serve deployment. The decorator's settings "
                        "are ignored when deploying via import path.")
                    deployment_def = deployment_def.func_or_class

            init_args = cloudpickle.loads(serialized_init_args)
            init_kwargs = cloudpickle.loads(serialized_init_kwargs)

            deployment_config = DeploymentConfig.from_proto_bytes(
                deployment_config_proto_bytes)

            if inspect.isfunction(deployment_def):
                is_function = True
            elif inspect.isclass(deployment_def):
                is_function = False
            else:
                assert False, (
                    "deployment_def must be function, class, or "
                    "corresponding import path. Instead, it's type was "
                    f"{type(deployment_def)}.")

            # Set the controller name so that serve.connect() in the user's
            # code will connect to the instance that this deployment is running
            # in.
            ray.serve.context.set_internal_replica_context(
                deployment_name,
                replica_tag,
                controller_name,
                servable_object=None,
            )

            assert controller_name, "Must provide a valid controller_name"

            controller_handle = ray.get_actor(controller_name,
                                              namespace=SERVE_NAMESPACE)

            # This closure initializes user code and finalizes replica
            # startup. By splitting the initialization step like this,
            # we can already access this actor before the user code
            # has finished initializing.
            # The supervising state manager can then wait
            # for allocation of this replica by using the `is_allocated`
            # method. After that, it calls `reconfigure` to trigger
            # user code initialization.
            async def initialize_replica():
                if is_function:
                    _callable = deployment_def
                else:
                    # This allows deployments to define an async __init__
                    # method (required for FastAPI).
                    _callable = deployment_def.__new__(deployment_def)
                    await sync_to_async(_callable.__init__)(*init_args,
                                                            **init_kwargs)

                # Setting the context again to update the servable_object.
                ray.serve.context.set_internal_replica_context(
                    deployment_name,
                    replica_tag,
                    controller_name,
                    servable_object=_callable,
                )

                self.replica = RayServeReplica(
                    _callable,
                    deployment_name,
                    replica_tag,
                    deployment_config,
                    deployment_config.user_config,
                    version,
                    is_function,
                    controller_handle,
                )

            # Is it fine that replica is None here?
            # Should we add a check in all methods that use self.replica
            # or, alternatively, create an async get_replica() method?
            self.replica = None
            self._initialize_replica = initialize_replica
Пример #6
0
        async def __init__(
            self,
            deployment_name,
            replica_tag,
            init_args,
            init_kwargs,
            deployment_config_proto_bytes: bytes,
            version: DeploymentVersion,
            controller_name: str,
            controller_namespace: str,
            detached: bool,
        ):

            if import_path is not None:
                module_name, attr_name = parse_import_path(import_path)
                deployment_def = getattr(import_module(module_name), attr_name)
            else:
                deployment_def = cloudpickle.loads(serialized_deployment_def)

            deployment_config = DeploymentConfig.from_proto_bytes(
                deployment_config_proto_bytes)

            if inspect.isfunction(deployment_def):
                is_function = True
            elif inspect.isclass(deployment_def):
                is_function = False
            else:
                assert False, (
                    "deployment_def must be function, class, or "
                    "corresponding import path. Instead, it's type was "
                    f"{type(deployment_def)}.")

            # Set the controller name so that serve.connect() in the user's
            # code will connect to the instance that this deployment is running
            # in.
            ray.serve.api._set_internal_replica_context(
                deployment_name,
                replica_tag,
                controller_name,
                controller_namespace,
                servable_object=None,
            )

            assert controller_name, "Must provide a valid controller_name"

            controller_handle = ray.get_actor(controller_name,
                                              namespace=controller_namespace)

            # This closure initializes user code and finalizes replica
            # startup. By splitting the initialization step like this,
            # we can already access this actor before the user code
            # has finished initializing.
            # The supervising state manager can then wait
            # for allocation of this replica by using the `is_allocated`
            # method. After that, it calls `reconfigure` to trigger
            # user code initialization.
            async def initialize_replica():
                if is_function:
                    _callable = deployment_def
                else:
                    # This allows deployments to define an async __init__
                    # method (required for FastAPI).
                    _callable = deployment_def.__new__(deployment_def)
                    await sync_to_async(_callable.__init__)(*init_args,
                                                            **init_kwargs)

                # Setting the context again to update the servable_object.
                ray.serve.api._set_internal_replica_context(
                    deployment_name,
                    replica_tag,
                    controller_name,
                    controller_namespace,
                    servable_object=_callable,
                )

                self.replica = RayServeReplica(
                    _callable,
                    deployment_name,
                    replica_tag,
                    deployment_config,
                    deployment_config.user_config,
                    version,
                    is_function,
                    controller_handle,
                )

            # Is it fine that replica is None here?
            # Should we add a check in all methods that use self.replica
            # or, alternatively, create an async get_replica() method?
            self.replica = None
            self._initialize_replica = initialize_replica