def __init__( self, controller_handle: ActorHandle, deployment_name: str, event_loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: assign a replica. Args: controller_handle(ActorHandle): The controller handle. """ self._event_loop = event_loop self._replica_set = ReplicaSet(deployment_name, event_loop) # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("deployment", )) self.num_router_requests.set_default_tags( {"deployment": deployment_name}) self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.RUNNING_REPLICAS, deployment_name): self._replica_set.update_running_replicas, }, call_in_event_loop=event_loop, )
def __init__( self, controller_handle: ActorHandle, endpoint_tag: EndpointTag, loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: choose backend, and assign replica. Args: controller_handle(ActorHandle): The controller handle. """ self.controller = controller_handle self.endpoint_tag = endpoint_tag self.endpoint_policy: Optional[EndpointPolicy] = None self.backend_replicas: Dict[BackendTag, ReplicaSet] = dict() self._pending_endpoint_registered = asyncio.Event(loop=loop) self._loop = loop or asyncio.get_event_loop() # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("endpoint", )) self.long_poll_client = LongPollClient( self.controller, { (LongPollNamespace.TRAFFIC_POLICIES, endpoint_tag): self._update_traffic_policy, }, call_in_event_loop=self._loop, )
def __init__( self, controller_handle: ActorHandle, backend_tag: BackendTag, event_loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: choose backend, and assign replica. Args: controller_handle(ActorHandle): The controller handle. """ self._event_loop = event_loop self._replica_set = ReplicaSet(backend_tag, event_loop) # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("deployment", )) self.num_router_requests.set_default_tags({"deployment": backend_tag}) self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.BACKEND_CONFIGS, backend_tag): self. _replica_set.set_max_concurrent_queries, (LongPollNamespace.REPLICA_HANDLES, backend_tag): self. _replica_set.update_worker_replicas, }, call_in_event_loop=event_loop, )
def __init__(self, controller_name: str, controller_namespace: str): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) # Used only for displaying the route table. self.route_info: Dict[str, EndpointTag] = dict() def get_handle(name): return serve.api._get_global_client().get_handle( name, sync=False, missing_ok=True, _internal_pickled_http_request=True, ) self.prefix_router = LongestPrefixRouter(get_handle) self.long_poll_client = LongPollClient( ray.get_actor(controller_name, namespace=controller_namespace), { LongPollNamespace.ROUTE_TABLE: self._update_routes, }, call_in_event_loop=asyncio.get_event_loop()) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__(self, controller_name: str): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) # Used only for displaying the route table. self.route_info: Dict[str, Tuple[EndpointTag, List[str]]] = dict() # NOTE(edoakes): we currently have both a Starlette router and a # longest-prefix router to maintain compatibility with the old API. # We first match on the Starlette router (which contains routes using # the old API) and then fall back to the prefix router. The Starlette # router can be removed once we deprecate the old API. self.starlette_router = starlette.routing.Router( default=self._fallback_to_prefix_router) self.prefix_router = LongestPrefixRouter() self.long_poll_client = LongPollClient( ray.get_actor(controller_name), { LongPollNamespace.ROUTE_TABLE: self._update_routes, }, call_in_event_loop=asyncio.get_event_loop()) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__(self, controller_name: str): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) self.router = LongestPrefixRouter() self.long_poll_client = LongPollClient( ray.get_actor(controller_name), { LongPollNamespace.ROUTE_TABLE: self.router.update_routes, }, call_in_event_loop=asyncio.get_event_loop()) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
async def test_client_threadsafe(serve_instance): host = ray.remote(LongPollHost).remote() ray.get(host.notify_changed.remote("key_1", 100)) e = asyncio.Event() def key_1_callback(_): e.set() _ = LongPollClient(host, { "key_1": key_1_callback, }, call_in_event_loop=asyncio.get_event_loop()) await e.wait()
def __init__( self, controller_handle, backend_tag, event_loop: asyncio.AbstractEventLoop, ): self.backend_tag = backend_tag # NOTE(simon): We have to do this because max_concurrent_queries # and the replica handles come from different long poll keys. self.max_concurrent_queries: int = 8 self.in_flight_queries: Dict[ActorHandle, set] = dict() # The iterator used for load balancing among replicas. Using itertools # cycle, we implements a round-robin policy, skipping overloaded # replicas. # NOTE(simon): We can make this more pluggable and consider different # policies like: min load, pick min of two replicas, pick replicas on # the same node. self.replica_iterator = itertools.cycle(self.in_flight_queries.keys()) # Used to unblock this replica set waiting for free replicas. A newly # added replica or updated max_concurrent_queries value means the # query that waits on a free replica might be unblocked on. self.config_updated_event = asyncio.Event(loop=event_loop) self.num_queued_queries = 0 self.num_queued_queries_gauge = metrics.Gauge( "serve_deployment_queued_queries", description=( "The current number of queries to this deployment waiting" " to be assigned to a replica."), tag_keys=("deployment", "endpoint")) self.num_queued_queries_gauge.set_default_tags({ "deployment": self.backend_tag }) self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.BACKEND_CONFIGS, backend_tag): self. set_max_concurrent_queries, (LongPollNamespace.REPLICA_HANDLES, backend_tag): self. update_worker_replicas, }, call_in_event_loop=event_loop, )
async def test_client(serve_instance): host = ray.remote(LongPollHost).remote() # Write two values ray.get(host.notify_changed.remote("key_1", 100)) ray.get(host.notify_changed.remote("key_2", 999)) callback_results = dict() def key_1_callback(result): callback_results["key_1"] = result def key_2_callback(result): callback_results["key_2"] = result client = LongPollClient( host, { "key_1": key_1_callback, "key_2": key_2_callback, }, call_in_event_loop=asyncio.get_event_loop(), ) while len(client.object_snapshots) == 0: time.sleep(0.1) assert client.object_snapshots["key_1"] == 100 assert client.object_snapshots["key_2"] == 999 ray.get(host.notify_changed.remote("key_2", 1999)) values = set() for _ in range(3): values.add(client.object_snapshots["key_2"]) if 1999 in values: break await asyncio.sleep(1) assert 1999 in values assert callback_results == {"key_1": 100, "key_2": 1999}
def __init__(self, controller_name: str): # Set the controller name so that serve.connect() will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) controller = ray.get_actor(controller_name) self.router = starlette.routing.Router(default=self._not_found) # route -> (endpoint_tag, methods). Updated via long polling. self.route_table: Dict[str, Tuple[EndpointTag, List[str]]] = {} self.long_poll_client = LongPollClient(controller, { LongPollNamespace.ROUTE_TABLE: self._update_route_table, }) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__(self, _callable: Callable, backend_config: BackendConfig, is_function: bool, controller_handle: ActorHandle) -> None: self.backend_tag = ray.serve.api.get_replica_context().deployment self.replica_tag = ray.serve.api.get_replica_context().replica_tag self.callable = _callable self.is_function = is_function self.config = backend_config self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description=("The number of queries that have been " "processed in this replica."), tag_keys=("deployment", "replica")) self.request_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.loop = asyncio.get_event_loop() self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.BACKEND_CONFIGS, self.backend_tag): self._update_backend_configs, }, call_in_event_loop=self.loop, ) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=("The number of exceptions that have " "occurred in this replica."), tag_keys=("deployment", "replica")) self.error_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description=("The number of times this replica " "has been restarted due to failure."), tag_keys=("deployment", "replica")) self.restart_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica")) self.processing_latency_tracker.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica")) self.num_processing_items.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.restart_counter.inc() ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.backend_tag} " f"replica={self.replica_tag}"))