def set_traffic(endpoint_name, traffic_policy_dictionary): """Associate a service endpoint with traffic policy. Example: >>> serve.set_traffic("service-name", { "backend:v1": 0.5, "backend:v2": 0.5 }) Args: endpoint_name (str): A registered service endpoint. traffic_policy_dictionary (dict): a dictionary maps backend names to their traffic weights. The weights must sum to 1. """ retry_actor_failures(master_actor.set_traffic, endpoint_name, traffic_policy_dictionary)
def create_backend(func_or_class, backend_tag, *actor_init_args, backend_config=None): """Create a backend using func_or_class and assign backend_tag. Args: func_or_class (callable, class): a function or a class implementing __call__. backend_tag (str): a unique tag assign to this backend. It will be used to associate services in traffic policy. backend_config (BackendConfig): An object defining backend properties for starting a backend. *actor_init_args (optional): the argument to pass to the class initialization method. """ # Configure backend_config if backend_config is None: backend_config = BackendConfig() assert isinstance(backend_config, BackendConfig), ("backend_config must be" " of instance BackendConfig") # Validate that func_or_class is a function or class. if inspect.isfunction(func_or_class): if len(actor_init_args) != 0: raise ValueError( "actor_init_args not supported for function backend.") elif not inspect.isclass(func_or_class): raise ValueError( "Backend must be a function or class, it is {}.".format( type(func_or_class))) # Make sure the batch size is correct. should_accept_batch = backend_config.max_batch_size is not None if should_accept_batch and not _backend_accept_batch(func_or_class): raise batch_annotation_not_found if _backend_accept_batch(func_or_class): backend_config.has_accept_batch_annotation = True retry_actor_failures(master_actor.create_backend, backend_tag, backend_config, func_or_class, actor_init_args)
def stat(percentiles=[50, 90, 95], agg_windows_seconds=[10, 60, 300, 600, 3600]): """Retrieve metric statistics about ray serve system. Args: percentiles(List[int]): The percentiles for aggregation operations. Default is 50th, 90th, 95th percentile. agg_windows_seconds(List[int]): The aggregation windows in seconds. The longest aggregation window must be shorter or equal to the gc_window_seconds. """ [monitor] = retry_actor_failures(master_actor.get_metric_monitor) return ray.get(monitor.collect.remote(percentiles, agg_windows_seconds))
def __init__(self, backend_tag, replica_tag, init_args, instance_name=None): serve.init(name=instance_name) if is_function: _callable = func_or_class else: _callable = func_or_class(*init_args) master = serve.api._get_master_actor() [metric_exporter ] = retry_actor_failures(master.get_metric_exporter) metric_client = MetricClient( metric_exporter, default_labels={"backend": backend_tag}) self.backend = RayServeWorker(backend_tag, replica_tag, _callable, is_function, metric_client)
def delete_backend(backend_tag): """Delete the given backend. The backend must not currently be used by any endpoints. """ retry_actor_failures(master_actor.delete_backend, backend_tag)
def delete_endpoint(endpoint): """Delete the given endpoint. Does not delete any associated backends. """ retry_actor_failures(master_actor.delete_endpoint, endpoint)
async def __init__(self): # Note: Several queues are used in the router # - When a request come in, it's placed inside its corresponding # endpoint_queue. # - The endpoint_queue is dequeued during flush operation, which moves # the queries to backend buffer_queue. Here we match a request # for an endpoint to a backend given some policy. # - The worker_queue is used to collect idle actor handle. These # handles are dequed during the second stage of flush operation, # which assign queries in buffer_queue to actor handle. # -- Queues -- # # endpoint_name -> request queue self.endpoint_queues: DefaultDict[asyncio.Queue[Query]] = defaultdict( asyncio.Queue) # backend_name -> worker request queue self.worker_queues: DefaultDict[asyncio.Queue[ ray.actor.ActorHandle]] = defaultdict(asyncio.Queue) # backend_name -> worker payload queue self.backend_queues = defaultdict(blist.sortedlist) # -- Metadata -- # # endpoint_name -> traffic_policy self.traffic = dict() # backend_name -> backend_config self.backend_info = dict() # replica tag -> worker_handle self.replicas = dict() # -- Synchronization -- # # This lock guarantee that only one flush operation can happen at a # time. Without the lock, multiple flush operation can pop from the # same buffer_queue and worker_queue and create deadlock. For example, # an operation holding the only query and the other flush operation # holding the only idle replica. Additionally, allowing only one flush # operation at a time simplifies design overhead for custom queuing and # batching polcies. self.flush_lock = asyncio.Lock() # Fetch the worker handles, traffic policies, and backend configs from # the master actor. We use a "pull-based" approach instead of pushing # them from the master so that the router can transparently recover # from failure. ray.serve.init() master_actor = ray.serve.api._get_master_actor() traffic_policies = retry_actor_failures( master_actor.get_traffic_policies) for endpoint, traffic_policy in traffic_policies.items(): await self.set_traffic(endpoint, traffic_policy) backend_dict = retry_actor_failures( master_actor.get_all_worker_handles) for backend_tag, replica_dict in backend_dict.items(): for replica_tag, worker in replica_dict.items(): await self.add_new_worker(backend_tag, replica_tag, worker) backend_configs = retry_actor_failures( master_actor.get_backend_configs) for backend, backend_config in backend_configs.items(): await self.set_backend_config(backend, backend_config) self.metric_client = MetricClient.connect_from_serve() self.num_router_requests = self.metric_client.new_counter( "num_router_requests", description="Number of requests processed by the router.", label_names=("endpoint", )) self.num_error_endpoint_request = self.metric_client.new_counter( "num_error_endpoint_requests", description=("Number of requests errored when getting result " "for endpoint."), label_names=("endpoint", )) self.num_error_backend_request = self.metric_client.new_counter( "num_error_backend_requests", description=("Number of requests errored when getting result " "from backend."), label_names=("backend", ))
def list_backends(): """Returns a dictionary of all registered backends. Dictionary maps backend tags to backend configs. """ return retry_actor_failures(master_actor.get_all_backends)