def __init__( self, controller_name: str, controller_namespace: str, ): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.context.set_internal_replica_context(None, None, controller_name, controller_namespace, None) # Used only for displaying the route table. self.route_info: Dict[str, EndpointTag] = dict() def get_handle(name): return serve.context.get_global_client().get_handle( name, sync=False, missing_ok=True, _internal_pickled_http_request=True, ) self.prefix_router = LongestPrefixRouter(get_handle) self.long_poll_client = LongPollClient( ray.get_actor(controller_name, namespace=controller_namespace), { LongPollNamespace.ROUTE_TABLE: self._update_routes, }, call_in_event_loop=asyncio.get_event_loop(), ) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ), ) self.request_error_counter = metrics.Counter( "serve_num_http_error_requests", description="The number of non-200 HTTP responses.", tag_keys=("route", "error_code"), ) self.deployment_request_error_counter = metrics.Counter( "serve_num_deployment_http_error_requests", description= ("The number of non-200 HTTP responses returned by each deployment." ), tag_keys=("deployment", ), )
def __init__(self, controller_name: str): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) # Used only for displaying the route table. self.route_info: Dict[str, Tuple[EndpointTag, List[str]]] = dict() # NOTE(edoakes): we currently have both a Starlette router and a # longest-prefix router to maintain compatibility with the old API. # We first match on the Starlette router (which contains routes using # the old API) and then fall back to the prefix router. The Starlette # router can be removed once we deprecate the old API. self.starlette_router = starlette.routing.Router( default=self._fallback_to_prefix_router) self.prefix_router = LongestPrefixRouter() self.long_poll_client = LongPollClient( ray.get_actor(controller_name), { LongPollNamespace.ROUTE_TABLE: self._update_routes, }, call_in_event_loop=asyncio.get_event_loop()) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__(self): self.my_counter = metrics.Counter( "my_counter", description=("The number of excellent requests to this backend."), tag_keys=("deployment", )) self.my_counter.set_default_tags( {"deployment": serve.get_current_deployment()})
def __init__( self, controller_handle: ActorHandle, deployment_name: str, event_loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: assign a replica. Args: controller_handle(ActorHandle): The controller handle. """ self._event_loop = event_loop self._replica_set = ReplicaSet(deployment_name, event_loop) # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("deployment", )) self.num_router_requests.set_default_tags( {"deployment": deployment_name}) self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.RUNNING_REPLICAS, deployment_name): self._replica_set.update_running_replicas, }, call_in_event_loop=event_loop, )
def __init__( self, controller_handle: ActorHandle, endpoint_tag: EndpointTag, loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: choose backend, and assign replica. Args: controller_handle(ActorHandle): The controller handle. """ self.controller = controller_handle self.endpoint_tag = endpoint_tag self.endpoint_policy: Optional[EndpointPolicy] = None self.backend_replicas: Dict[BackendTag, ReplicaSet] = dict() self._pending_endpoint_registered = asyncio.Event(loop=loop) self._loop = loop or asyncio.get_event_loop() # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("endpoint", )) self.long_poll_client = LongPollClient( self.controller, { (LongPollNamespace.TRAFFIC_POLICIES, endpoint_tag): self._update_traffic_policy, }, call_in_event_loop=self._loop, )
def __init__( self, controller_handle: ActorHandle, endpoint_name: EndpointTag, handle_options: Optional[HandleOptions] = None, *, known_python_methods: List[str] = [], _router: Optional[EndpointRouter] = None, _internal_use_serve_request: Optional[bool] = True, _internal_pickled_http_request: bool = False, ): self.controller_handle = controller_handle self.endpoint_name = endpoint_name self.handle_options = handle_options or HandleOptions() self.known_python_methods = known_python_methods self.handle_tag = f"{self.endpoint_name}#{get_random_letters()}" self._use_serve_request = _internal_use_serve_request self._pickled_http_request = _internal_pickled_http_request self.request_counter = metrics.Counter( "serve_handle_request_counter", description=("The number of handle.remote() calls that have been " "made on this handle."), tag_keys=("handle", "endpoint")) self.request_counter.set_default_tags({ "handle": self.handle_tag, "endpoint": self.endpoint_name }) self.router: EndpointRouter = _router or self._make_router()
def __init__( self, controller_handle: ActorHandle, backend_tag: BackendTag, event_loop: asyncio.BaseEventLoop = None, ): """Router process incoming queries: choose backend, and assign replica. Args: controller_handle(ActorHandle): The controller handle. """ self._event_loop = event_loop self._replica_set = ReplicaSet(backend_tag, event_loop) # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("deployment", )) self.num_router_requests.set_default_tags({"deployment": backend_tag}) self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.BACKEND_CONFIGS, backend_tag): self. _replica_set.set_max_concurrent_queries, (LongPollNamespace.REPLICA_HANDLES, backend_tag): self. _replica_set.update_worker_replicas, }, call_in_event_loop=event_loop, )
def __init__( self, controller_handle: ActorHandle, deployment_name: EndpointTag, handle_options: Optional[HandleOptions] = None, *, _router: Optional[Router] = None, _internal_pickled_http_request: bool = False, ): self.controller_handle = controller_handle self.deployment_name = deployment_name self.handle_options = handle_options or HandleOptions() self.handle_tag = f"{self.deployment_name}#{get_random_letters()}" self._pickled_http_request = _internal_pickled_http_request self.request_counter = metrics.Counter( "serve_handle_request_counter", description=("The number of handle.remote() calls that have been " "made on this handle."), tag_keys=("handle", "deployment")) self.request_counter.set_default_tags({ "handle": self.handle_tag, "deployment": self.deployment_name }) self.router: Router = _router or self._make_router()
def __init__(self, controller_name: str): # Set the controller name so that serve will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) self.router = LongestPrefixRouter() self.long_poll_client = LongPollClient( ray.get_actor(controller_name), { LongPollNamespace.ROUTE_TABLE: self.router.update_routes, }, call_in_event_loop=asyncio.get_event_loop()) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__( self, controller_handle: ActorHandle, deployment_name: EndpointTag, handle_options: Optional[HandleOptions] = None, *, _router: Optional[Router] = None, _internal_pickled_http_request: bool = False, ): self.controller_handle = controller_handle self.deployment_name = deployment_name self.handle_options = handle_options or HandleOptions() self.handle_tag = f"{self.deployment_name}#{get_random_letters()}" self._pickled_http_request = _internal_pickled_http_request self.request_counter = metrics.Counter( "serve_handle_request_counter", description=("The number of handle.remote() calls that have been " "made on this handle."), tag_keys=("handle", "deployment"), ) self.request_counter.set_default_tags({ "handle": self.handle_tag, "deployment": self.deployment_name }) self.router: Router = _router or self._make_router() deployment_route = DeploymentRoute.FromString( ray.get( self.controller_handle.get_deployment_info.remote( self.deployment_name))) deployment_info = DeploymentInfo.from_proto( deployment_route.deployment_info) self._stop_event: Optional[threading.Event] = None self._pusher: Optional[threading.Thread] = None remote_func = self.controller_handle.record_handle_metrics.remote if deployment_info.deployment_config.autoscaling_config: self._stop_event = threading.Event() self._pusher = start_metrics_pusher( interval_s=HANDLE_METRIC_PUSH_INTERVAL_S, collection_callback=self._collect_handle_queue_metrics, metrics_process_func=remote_func, stop_event=self._stop_event, )
def __init__(self, controller_handle: ActorHandle): """Router process incoming queries: choose backend, and assign replica. Args: controller_handle(ActorHandle): The controller handle. """ self.controller = controller_handle self.endpoint_policies: Dict[str, EndpointPolicy] = dict() self.backend_replicas: Dict[str, ReplicaSet] = dict() self._pending_endpoints: Dict[str, asyncio.Future] = dict() # -- Metrics Registration -- # self.num_router_requests = metrics.Counter( "serve_num_router_requests", description="The number of requests processed by the router.", tag_keys=("endpoint", ))
def __init__( self, router, # ThreadProxiedRouter endpoint_name, handle_options: Optional[HandleOptions] = None): self.router = router self.endpoint_name = endpoint_name self.handle_options = handle_options or HandleOptions() self.handle_tag = f"{self.endpoint_name}#{get_random_letters()}" self.request_counter = metrics.Counter( "serve_handle_request_counter", description=("The number of handle.remote() calls that have been " "made on this handle."), tag_keys=("handle", "endpoint")) self.request_counter.set_default_tags({ "handle": self.handle_tag, "endpoint": self.endpoint_name })
def __init__(self, controller_name: str): # Set the controller name so that serve.connect() will connect to the # controller instance this proxy is running in. ray.serve.api._set_internal_replica_context(None, None, controller_name, None) controller = ray.get_actor(controller_name) self.router = starlette.routing.Router(default=self._not_found) # route -> (endpoint_tag, methods). Updated via long polling. self.route_table: Dict[str, Tuple[EndpointTag, List[str]]] = {} self.long_poll_client = LongPollClient(controller, { LongPollNamespace.ROUTE_TABLE: self._update_route_table, }) self.request_counter = metrics.Counter( "serve_num_http_requests", description="The number of HTTP requests processed.", tag_keys=("route", ))
def __init__( self, _callable: Callable, deployment_name: str, replica_tag: ReplicaTag, deployment_config: DeploymentConfig, user_config: Any, version: DeploymentVersion, is_function: bool, controller_handle: ActorHandle, ) -> None: self.deployment_config = deployment_config self.deployment_name = deployment_name self.replica_tag = replica_tag self.callable = _callable self.is_function = is_function self.user_config = user_config self.version = version self.rwlock = aiorwlock.RWLock() user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None) if not callable(user_health_check): def user_health_check(): pass self.user_health_check = sync_to_async(user_health_check) self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description= ("The number of queries that have been processed in this replica." ), tag_keys=("deployment", "replica"), ) self.request_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=( "The number of exceptions that have occurred in this replica." ), tag_keys=("deployment", "replica"), ) self.error_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description= ("The number of times this replica has been restarted due to failure." ), tag_keys=("deployment", "replica"), ) self.restart_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica"), ) self.processing_latency_tracker.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica"), ) self.num_processing_items.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter.inc() self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s if deployment_config.autoscaling_config: process_remote_func = controller_handle.record_autoscaling_metrics.remote config = deployment_config.autoscaling_config start_metrics_pusher( interval_s=config.metrics_interval_s, collection_callback=self._collect_autoscaling_metrics, metrics_process_func=process_remote_func, ) # NOTE(edoakes): we used to recommend that users use the "ray" logger # and tagged the logs with metadata as below. We now recommend using # the "ray.serve" 'component logger' (as of Ray 1.13). This is left to # maintain backwards compatibility with users who were using the # existing logger. We can consider removing it in Ray 2.0. ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.deployment_name} " f"replica={self.replica_tag}"))
def __init__(self, _callable: Callable, backend_config: BackendConfig, is_function: bool, controller_handle: ActorHandle) -> None: self.backend_tag = ray.serve.api.get_replica_context().backend_tag self.replica_tag = ray.serve.api.get_replica_context().replica_tag self.callable = _callable self.is_function = is_function self.config = backend_config self.batch_queue = _BatchQueue(self.config.max_batch_size or 1, self.config.batch_wait_timeout) self.reconfigure(self.config.user_config) self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_backend_request_counter", description=("The number of queries that have been " "processed in this replica."), tag_keys=("backend", )) self.request_counter.set_default_tags({"backend": self.backend_tag}) self.long_poll_client = LongPollAsyncClient(controller_handle, { LongPollKey.BACKEND_CONFIGS: self._update_backend_configs, }) self.error_counter = metrics.Counter( "serve_backend_error_counter", description=("The number of exceptions that have " "occurred in the backend."), tag_keys=("backend", )) self.error_counter.set_default_tags({"backend": self.backend_tag}) self.restart_counter = metrics.Counter( "serve_backend_replica_starts", description=("The number of times this replica " "has been restarted due to failure."), tag_keys=("backend", "replica")) self.restart_counter.set_default_tags({ "backend": self.backend_tag, "replica": self.replica_tag }) self.queuing_latency_tracker = metrics.Histogram( "serve_backend_queuing_latency_ms", description=("The latency for queries in the replica's queue " "waiting to be processed or batched."), boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("backend", "replica")) self.queuing_latency_tracker.set_default_tags({ "backend": self.backend_tag, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_backend_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("backend", "replica", "batch_size")) self.processing_latency_tracker.set_default_tags({ "backend": self.backend_tag, "replica": self.replica_tag }) self.num_queued_items = metrics.Gauge( "serve_replica_queued_queries", description=("The current number of queries queued in " "the backend replicas."), tag_keys=("backend", "replica")) self.num_queued_items.set_default_tags({ "backend": self.backend_tag, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("backend", "replica")) self.num_processing_items.set_default_tags({ "backend": self.backend_tag, "replica": self.replica_tag }) self.restart_counter.inc() ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve backend={self.backend_tag} " f"replica={self.replica_tag}")) asyncio.get_event_loop().create_task(self.main_loop())
def __init__( self, _callable: Callable, deployment_name: str, replica_tag: ReplicaTag, deployment_config: DeploymentConfig, user_config: Any, version: DeploymentVersion, is_function: bool, controller_handle: ActorHandle, ) -> None: self.deployment_config = deployment_config self.deployment_name = deployment_name self.replica_tag = replica_tag self.callable = _callable self.is_function = is_function self.user_config = user_config self.version = version self.rwlock = aiorwlock.RWLock() user_health_check = getattr(_callable, HEALTH_CHECK_METHOD, None) if not callable(user_health_check): def user_health_check(): pass self.user_health_check = sync_to_async(user_health_check) self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description=("The number of queries that have been " "processed in this replica."), tag_keys=("deployment", "replica"), ) self.request_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=("The number of exceptions that have " "occurred in this replica."), tag_keys=("deployment", "replica"), ) self.error_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description=("The number of times this replica " "has been restarted due to failure."), tag_keys=("deployment", "replica"), ) self.restart_counter.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica"), ) self.processing_latency_tracker.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica"), ) self.num_processing_items.set_default_tags({ "deployment": self.deployment_name, "replica": self.replica_tag }) self.restart_counter.inc() self._shutdown_wait_loop_s = deployment_config.graceful_shutdown_wait_loop_s if deployment_config.autoscaling_config: config = deployment_config.autoscaling_config start_metrics_pusher( interval_s=config.metrics_interval_s, collection_callback=self._collect_autoscaling_metrics, controller_handle=controller_handle, ) ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.deployment_name} " f"replica={self.replica_tag}"))
def __init__(self, _callable: Callable, backend_config: BackendConfig, is_function: bool, controller_handle: ActorHandle) -> None: self.backend_tag = ray.serve.api.get_replica_context().deployment self.replica_tag = ray.serve.api.get_replica_context().replica_tag self.callable = _callable self.is_function = is_function self.config = backend_config self.num_ongoing_requests = 0 self.request_counter = metrics.Counter( "serve_deployment_request_counter", description=("The number of queries that have been " "processed in this replica."), tag_keys=("deployment", "replica")) self.request_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.loop = asyncio.get_event_loop() self.long_poll_client = LongPollClient( controller_handle, { (LongPollNamespace.BACKEND_CONFIGS, self.backend_tag): self._update_backend_configs, }, call_in_event_loop=self.loop, ) self.error_counter = metrics.Counter( "serve_deployment_error_counter", description=("The number of exceptions that have " "occurred in this replica."), tag_keys=("deployment", "replica")) self.error_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.restart_counter = metrics.Counter( "serve_deployment_replica_starts", description=("The number of times this replica " "has been restarted due to failure."), tag_keys=("deployment", "replica")) self.restart_counter.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.processing_latency_tracker = metrics.Histogram( "serve_deployment_processing_latency_ms", description="The latency for queries to be processed.", boundaries=DEFAULT_LATENCY_BUCKET_MS, tag_keys=("deployment", "replica")) self.processing_latency_tracker.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.num_processing_items = metrics.Gauge( "serve_replica_processing_queries", description="The current number of queries being processed.", tag_keys=("deployment", "replica")) self.num_processing_items.set_default_tags({ "deployment": self.backend_tag, "replica": self.replica_tag }) self.restart_counter.inc() ray_logger = logging.getLogger("ray") for handler in ray_logger.handlers: handler.setFormatter( logging.Formatter( handler.formatter._fmt + f" component=serve deployment={self.backend_tag} " f"replica={self.replica_tag}"))