def _get_or_start_router(self): """Get the router belonging to this serve cluster. If the router does not already exist, it will be started. """ try: self.router = ray.util.get_actor(SERVE_ROUTER_NAME) except ValueError: logger.info( "Starting router with name '{}'".format(SERVE_ROUTER_NAME)) self.router = async_retryable(ray.remote(Router)).options( detached=True, name=SERVE_ROUTER_NAME, max_concurrency=ASYNC_CONCURRENCY, max_restarts=-1).remote()
def _get_or_start_router(self): """Get the router belonging to this serve cluster. If the router does not already exist, it will be started. """ router_name = format_actor_name(SERVE_ROUTER_NAME, self.cluster_name) try: self.router = ray.get_actor(router_name) except ValueError: logger.info("Starting router with name '{}'".format(router_name)) self.router = async_retryable(ray.remote(Router)).options( name=router_name, max_concurrency=ASYNC_CONCURRENCY, max_restarts=-1, ).remote(cluster_name=self.cluster_name)
def _get_or_start_http_proxy(self, host, port): """Get the HTTP proxy belonging to this serve cluster. If the HTTP proxy does not already exist, it will be started. """ try: self.http_proxy = ray.util.get_actor(SERVE_PROXY_NAME) except ValueError: logger.info( "Starting HTTP proxy with name '{}'".format(SERVE_PROXY_NAME)) self.http_proxy = async_retryable(HTTPProxyActor).options( detached=True, name=SERVE_PROXY_NAME, max_concurrency=ASYNC_CONCURRENCY, max_reconstructions=ray.ray_constants.INFINITE_RECONSTRUCTION, ).remote(host, port)
def _get_or_start_router(self, router_class, router_kwargs): """Get the router belonging to this serve cluster. If the router does not already exist, it will be started. """ try: self.router = ray.util.get_actor(SERVE_ROUTER_NAME) except ValueError: logger.info( "Starting router with name '{}'".format(SERVE_ROUTER_NAME)) self.router = async_retryable(router_class).options( detached=True, name=SERVE_ROUTER_NAME, max_concurrency=ASYNC_CONCURRENCY, max_reconstructions=ray.ray_constants.INFINITE_RECONSTRUCTION, ).remote(**router_kwargs)
async def _start_backend_worker(self, backend_tag, replica_tag): """Creates a backend worker and waits for it to start up. Assumes that the backend configuration has already been registered in self.backends. """ logger.debug("Starting worker '{}' for backend '{}'.".format( replica_tag, backend_tag)) (backend_worker, backend_config, replica_config) = self.backends[backend_tag] worker_handle = async_retryable(ray.remote(backend_worker)).options( detached=True, name=replica_tag, max_restarts=-1, **replica_config.ray_actor_options).remote( backend_tag, replica_tag, replica_config.actor_init_args) # TODO(edoakes): we should probably have a timeout here. await worker_handle.ready.remote() return worker_handle
def _get_or_start_http_proxy(self, node_id, host, port): """Get the HTTP proxy belonging to this serve cluster. If the HTTP proxy does not already exist, it will be started. """ try: self.http_proxy = ray.util.get_actor(SERVE_PROXY_NAME) except ValueError: logger.info( "Starting HTTP proxy with name '{}' on node '{}'".format( SERVE_PROXY_NAME, node_id)) self.http_proxy = async_retryable(HTTPProxyActor).options( detached=True, name=SERVE_PROXY_NAME, max_concurrency=ASYNC_CONCURRENCY, max_restarts=-1, resources={ node_id: 0.01 }, ).remote(host, port)
async def _start_backend_worker(self, backend_tag, replica_tag): """Creates a backend worker and waits for it to start up. Assumes that the backend configuration has already been registered in self.backends. """ logger.debug("Starting worker '{}' for backend '{}'.".format( replica_tag, backend_tag)) worker_creator, init_args, config_dict = self.backends[backend_tag] # TODO(edoakes): just store the BackendConfig in self.backends. backend_config = BackendConfig(**config_dict) kwargs = backend_config.get_actor_creation_args() worker_handle = async_retryable(ray.remote(worker_creator)).options( detached=True, name=replica_tag, max_reconstructions=ray.ray_constants.INFINITE_RECONSTRUCTION, **kwargs).remote(backend_tag, replica_tag, init_args) # TODO(edoakes): we should probably have a timeout here. await worker_handle.ready.remote() return worker_handle