def test_refresh_controller_after_death(shutdown_ray, detached): """Check if serve.start() refreshes the controller handle if it's dead.""" ray.init(namespace="ray_namespace") serve.shutdown() # Ensure serve isn't running before beginning the test serve.start(detached=detached) old_handle = get_global_client()._controller ray.kill(old_handle, no_restart=True) def controller_died(handle): try: ray.get(handle.check_alive.remote()) return False except RayActorError: return True wait_for_condition(controller_died, handle=old_handle, timeout=15) # Call start again to refresh handle serve.start(detached=detached) new_handle = get_global_client()._controller assert new_handle is not old_handle # Health check should not error ray.get(new_handle.check_alive.remote()) serve.shutdown() ray.shutdown()
async def put_all_deployments(self, req: Request) -> Response: from ray.serve.context import get_global_client from ray.serve.schema import ServeApplicationSchema config = ServeApplicationSchema.parse_obj(await req.json()) get_global_client().deploy_app(config) return Response()
def construct_wide_fanout_graph_with_pure_handle( fanout_degree, sync_handle: bool, init_delay_secs=0, compute_delay_secs=0) -> RayServeSyncHandle: nodes = [] for id in range(fanout_degree): Node.options(name=str(id)).deploy(id, init_delay_secs=init_delay_secs) nodes.append(get_global_client().get_handle(str(id), sync=sync_handle)) CombineNode.options(name="combine").deploy(nodes, compute_delay_secs, sync_handle=sync_handle) return get_global_client().get_handle("combine", sync=sync_handle)
def deploy(self, *init_args, _blocking=True, **init_kwargs): """Deploy or update this deployment. Args: init_args: args to pass to the class __init__ method. Not valid if this deployment wraps a function. init_kwargs: kwargs to pass to the class __init__ method. Not valid if this deployment wraps a function. """ if len(init_args) == 0 and self._init_args is not None: init_args = self._init_args if len(init_kwargs) == 0 and self._init_kwargs is not None: init_kwargs = self._init_kwargs return get_global_client().deploy( self._name, self._func_or_class, init_args, init_kwargs, ray_actor_options=self._ray_actor_options, config=self._config, version=self._version, prev_version=self._prev_version, route_prefix=self.route_prefix, url=self.url, _blocking=_blocking, )
def url(self) -> Optional[str]: """Full HTTP url for this deployment.""" if self._route_prefix is None: # this deployment is not exposed over HTTP return None return get_global_client().root_url + self.route_prefix
async def get_all_deployments(self, req: Request) -> Response: from ray.serve.context import get_global_client client = get_global_client() return Response( text=json.dumps(client.get_app_config()), content_type="application/json", )
async def put_all_deployments(self, req: Request) -> Response: from ray import serve from ray.serve.context import get_global_client from ray.serve.application import Application app = Application.from_dict(await req.json()) serve.run(app, _blocking=False) new_names = set() for deployment in app.deployments.values(): new_names.add(deployment.name) all_deployments = serve.list_deployments() all_names = set(all_deployments.keys()) names_to_delete = all_names.difference(new_names) get_global_client().delete_deployments(names_to_delete) return Response()
def construct_long_chain_graph_with_pure_handle( chain_length, sync_handle: bool, init_delay_secs=0, compute_delay_secs=0 ): prev_handle = None for id in range(chain_length): Node.options(name=str(id)).deploy( id, prev_handle, init_delay_secs, compute_delay_secs, sync_handle ) prev_handle = get_global_client().get_handle(str(id), sync=sync_handle) return prev_handle
async def get_all_deployment_statuses(self, req: Request) -> Response: from ray.serve.context import get_global_client from ray.serve.schema import serve_status_to_schema client = get_global_client() serve_status_schema = serve_status_to_schema(client.get_serve_status()) return Response( text=serve_status_schema.json(), content_type="application/json", )
def test_handle_cache_out_of_scope(serve_instance): # https://github.com/ray-project/ray/issues/18980 initial_num_cached = len(get_global_client().handle_cache) @serve.deployment(name="f") def f(): return "hi" handle = serve.run(f.bind()) handle_cache = get_global_client().handle_cache assert len(handle_cache) == initial_num_cached + 1 def sender_where_handle_goes_out_of_scope(): f = serve.get_deployment("f").get_handle() assert f is handle assert ray.get(f.remote()) == "hi" [sender_where_handle_goes_out_of_scope() for _ in range(30)] assert len(handle_cache) == initial_num_cached + 1
def test_override_namespace(shutdown_ray, detached): """Test the _override_controller_namespace flag in serve.start().""" ray_namespace = "ray_namespace" controller_namespace = "controller_namespace" ray.init(namespace=ray_namespace) serve.start(detached=detached, _override_controller_namespace=controller_namespace) controller_name = get_global_client()._controller_name ray.get_actor(controller_name, namespace=controller_namespace) serve.shutdown()
def get_handle( self, sync: Optional[bool] = True ) -> Union[RayServeHandle, RayServeSyncHandle]: """Get a ServeHandle to this deployment to invoke it from Python. Args: sync: If true, then Serve will return a ServeHandle that works everywhere. Otherwise, Serve will return an asyncio-optimized ServeHandle that's only usable in an asyncio loop. Returns: ServeHandle """ return get_global_client().get_handle(self._name, missing_ok=True, sync=sync)
def shutdown() -> None: """Completely shut down the connected Serve instance. Shuts down all processes and deletes all state associated with the instance. """ try: client = get_global_client() except RayServeException: logger.info("Nothing to shut down. There's no Serve application " "running on this Ray cluster.") return client.shutdown() set_global_client(None)
async def put_all_deployments(self, req: Request) -> Response: from ray import serve from ray.serve.context import get_global_client from ray.serve.schema import ServeApplicationSchema from ray.serve.application import Application config = ServeApplicationSchema.parse_obj(await req.json()) if config.import_path is not None: client = get_global_client(_override_controller_namespace="serve") client.deploy_app(config) else: # TODO (shrekris-anyscale): Remove this conditional path app = Application.from_dict(await req.json()) serve.run(app, _blocking=False) return Response()
def get_deployment_statuses() -> Dict[str, DeploymentStatusInfo]: """Returns a dictionary of deployment statuses. A deployment's status is one of {UPDATING, UNHEALTHY, and HEALTHY}. Example: >>> from ray.serve.api import get_deployment_statuses >>> statuses = get_deployment_statuses() # doctest: +SKIP >>> status_info = statuses["deployment_name"] # doctest: +SKIP >>> status = status_info.status # doctest: +SKIP >>> message = status_info.message # doctest: +SKIP Returns: Dict[str, DeploymentStatus]: This dictionary maps the running deployment's name to a DeploymentStatus object containing its status and a message explaining the status. """ return get_global_client().get_deployment_statuses()
def test_get_serve_status(shutdown_ray): ray.init() @serve.deployment def f(*args): return "Hello world" serve.run(f.bind()) client = get_global_client() status_info_1 = client.get_serve_status() assert status_info_1.app_status.status == "RUNNING" assert status_info_1.deployment_statuses[0].name == "f" assert status_info_1.deployment_statuses[0].status in { "UPDATING", "HEALTHY" } serve.shutdown() ray.shutdown()
def test_fixed_number_proxies(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) cluster.add_node(num_cpus=4) ray.init(head_node.address) node_ids = ray._private.state.node_ids() assert len(node_ids) == 3 with pytest.raises( pydantic.ValidationError, match="you must specify the `fixed_number_replicas` parameter.", ): serve.start( http_options={ "location": "FixedNumber", } ) serve.start( http_options={ "port": new_port(), "location": "FixedNumber", "fixed_number_replicas": 2, } ) # Only the controller and two http proxy should be started. controller_handle = get_global_client()._controller node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote()) assert len(node_to_http_actors) == 2 proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote()) proxy_names = ActorNameList.FromString(proxy_names_bytes) assert len(proxy_names.names) == 2 serve.shutdown() ray.shutdown() cluster.shutdown()
def list_deployments() -> Dict[str, Deployment]: """Returns a dictionary of all active deployments. Dictionary maps deployment name to Deployment objects. """ infos = get_global_client().list_deployments() deployments = {} for name, (deployment_info, route_prefix) in infos.items(): deployments[name] = Deployment( deployment_info.replica_config.deployment_def, name, deployment_info.deployment_config, version=deployment_info.version, init_args=deployment_info.replica_config.init_args, init_kwargs=deployment_info.replica_config.init_kwargs, route_prefix=route_prefix, ray_actor_options=deployment_info.replica_config.ray_actor_options, _internal=True, ) return deployments
def get_deployment(name: str) -> Deployment: """Dynamically fetch a handle to a Deployment object. This can be used to update and redeploy a deployment without access to the original definition. Example: >>> from ray import serve >>> MyDeployment = serve.get_deployment("name") # doctest: +SKIP >>> MyDeployment.options(num_replicas=10).deploy() # doctest: +SKIP Args: name(str): name of the deployment. This must have already been deployed. Returns: Deployment """ try: ( deployment_info, route_prefix, ) = get_global_client().get_deployment_info(name) except KeyError: raise KeyError( f"Deployment {name} was not found. Did you call Deployment.deploy()?" ) return Deployment( cloudpickle.loads( deployment_info.replica_config.serialized_deployment_def), name, deployment_info.deployment_config, version=deployment_info.version, init_args=deployment_info.replica_config.init_args, init_kwargs=deployment_info.replica_config.init_kwargs, route_prefix=route_prefix, ray_actor_options=deployment_info.replica_config.ray_actor_options, _internal=True, )
def start( detached: bool = False, http_options: Optional[Union[dict, HTTPOptions]] = None, dedicated_cpu: bool = False, _checkpoint_path: str = DEFAULT_CHECKPOINT_PATH, **kwargs, ) -> ServeControllerClient: """Initialize a serve instance. By default, the instance will be scoped to the lifetime of the returned Client object (or when the script exits). If detached is set to True, the instance will instead persist until serve.shutdown() is called. This is only relevant if connecting to a long-running Ray cluster (e.g., with ray.init(address="auto") or ray.init("ray://<remote_addr>")). Args: detached: Whether not the instance should be detached from this script. If set, the instance will live on the Ray cluster until it is explicitly stopped with serve.shutdown(). http_options (Optional[Dict, serve.HTTPOptions]): Configuration options for HTTP proxy. You can pass in a dictionary or HTTPOptions object with fields: - host(str, None): Host for HTTP servers to listen on. Defaults to "127.0.0.1". To expose Serve publicly, you probably want to set this to "0.0.0.0". - port(int): Port for HTTP server. Defaults to 8000. - root_path(str): Root path to mount the serve application (for example, "/serve"). All deployment routes will be prefixed with this path. Defaults to "". - middlewares(list): A list of Starlette middlewares that will be applied to the HTTP servers in the cluster. Defaults to []. - location(str, serve.config.DeploymentMode): The deployment location of HTTP servers: - "HeadOnly": start one HTTP server on the head node. Serve assumes the head node is the node you executed serve.start on. This is the default. - "EveryNode": start one HTTP server per node. - "NoServer" or None: disable HTTP server. - num_cpus (int): The number of CPU cores to reserve for each internal Serve HTTP proxy actor. Defaults to 0. dedicated_cpu: Whether to reserve a CPU core for the internal Serve controller actor. Defaults to False. """ usage_lib.record_library_usage("serve") http_deprecated_args = ["http_host", "http_port", "http_middlewares"] for key in http_deprecated_args: if key in kwargs: raise ValueError( f"{key} is deprecated, please use serve.start(http_options=" f'{{"{key}": {kwargs[key]}}}) instead.') # Initialize ray if needed. ray._private.worker.global_worker.filter_logs_by_job = False if not ray.is_initialized(): ray.init(namespace=SERVE_NAMESPACE) try: client = get_global_client(_health_check_controller=True) logger.info( f'Connecting to existing Serve app in namespace "{SERVE_NAMESPACE}".' ) _check_http_and_checkpoint_options(client, http_options, _checkpoint_path) return client except RayServeException: pass if detached: controller_name = SERVE_CONTROLLER_NAME else: controller_name = format_actor_name(get_random_letters(), SERVE_CONTROLLER_NAME) if isinstance(http_options, dict): http_options = HTTPOptions.parse_obj(http_options) if http_options is None: http_options = HTTPOptions() controller = ServeController.options( num_cpus=1 if dedicated_cpu else 0, name=controller_name, lifetime="detached" if detached else None, max_restarts=-1, max_task_retries=-1, # Pin Serve controller on the head node. resources={ get_current_node_resource_key(): 0.01 }, namespace=SERVE_NAMESPACE, max_concurrency=CONTROLLER_MAX_CONCURRENCY, ).remote( controller_name, http_options, _checkpoint_path, detached=detached, ) proxy_handles = ray.get(controller.get_http_proxies.remote()) if len(proxy_handles) > 0: try: ray.get( [handle.ready.remote() for handle in proxy_handles.values()], timeout=HTTP_PROXY_TIMEOUT, ) except ray.exceptions.GetTimeoutError: raise TimeoutError( f"HTTP proxies not available after {HTTP_PROXY_TIMEOUT}s.") client = ServeControllerClient( controller, controller_name, detached=detached, ) set_global_client(client) logger.info(f"Started{' detached ' if detached else ' '}Serve instance in " f'namespace "{SERVE_NAMESPACE}".') return client
def delete(self): """Delete this deployment.""" return get_global_client().delete_deployments([self._name])