Exemplo n.º 1
0
def set_traffic(endpoint_name, traffic_policy_dictionary):
    """Associate a service endpoint with traffic policy.

    Example:

    >>> serve.set_traffic("service-name", {
        "backend:v1": 0.5,
        "backend:v2": 0.5
    })

    Args:
        endpoint_name (str): A registered service endpoint.
        traffic_policy_dictionary (dict): a dictionary maps backend names
            to their traffic weights. The weights must sum to 1.
    """
    retry_actor_failures(master_actor.set_traffic, endpoint_name,
                         traffic_policy_dictionary)
Exemplo n.º 2
0
def create_backend(func_or_class,
                   backend_tag,
                   *actor_init_args,
                   backend_config=None):
    """Create a backend using func_or_class and assign backend_tag.

    Args:
        func_or_class (callable, class): a function or a class implementing
            __call__.
        backend_tag (str): a unique tag assign to this backend. It will be used
            to associate services in traffic policy.
        backend_config (BackendConfig): An object defining backend properties
        for starting a backend.
        *actor_init_args (optional): the argument to pass to the class
            initialization method.
    """
    # Configure backend_config
    if backend_config is None:
        backend_config = BackendConfig()
    assert isinstance(backend_config,
                      BackendConfig), ("backend_config must be"
                                       " of instance BackendConfig")

    # Validate that func_or_class is a function or class.
    if inspect.isfunction(func_or_class):
        if len(actor_init_args) != 0:
            raise ValueError(
                "actor_init_args not supported for function backend.")
    elif not inspect.isclass(func_or_class):
        raise ValueError(
            "Backend must be a function or class, it is {}.".format(
                type(func_or_class)))

    # Make sure the batch size is correct.
    should_accept_batch = backend_config.max_batch_size is not None
    if should_accept_batch and not _backend_accept_batch(func_or_class):
        raise batch_annotation_not_found
    if _backend_accept_batch(func_or_class):
        backend_config.has_accept_batch_annotation = True

    retry_actor_failures(master_actor.create_backend, backend_tag,
                         backend_config, func_or_class, actor_init_args)
Exemplo n.º 3
0
def stat(percentiles=[50, 90, 95],
         agg_windows_seconds=[10, 60, 300, 600, 3600]):
    """Retrieve metric statistics about ray serve system.

    Args:
        percentiles(List[int]): The percentiles for aggregation operations.
            Default is 50th, 90th, 95th percentile.
        agg_windows_seconds(List[int]): The aggregation windows in seconds.
            The longest aggregation window must be shorter or equal to the
            gc_window_seconds.
    """
    [monitor] = retry_actor_failures(master_actor.get_metric_monitor)
    return ray.get(monitor.collect.remote(percentiles, agg_windows_seconds))
Exemplo n.º 4
0
        def __init__(self,
                     backend_tag,
                     replica_tag,
                     init_args,
                     instance_name=None):
            serve.init(name=instance_name)
            if is_function:
                _callable = func_or_class
            else:
                _callable = func_or_class(*init_args)

            master = serve.api._get_master_actor()
            [metric_exporter
             ] = retry_actor_failures(master.get_metric_exporter)
            metric_client = MetricClient(
                metric_exporter, default_labels={"backend": backend_tag})
            self.backend = RayServeWorker(backend_tag, replica_tag, _callable,
                                          is_function, metric_client)
Exemplo n.º 5
0
def delete_backend(backend_tag):
    """Delete the given backend.

    The backend must not currently be used by any endpoints.
    """
    retry_actor_failures(master_actor.delete_backend, backend_tag)
Exemplo n.º 6
0
def delete_endpoint(endpoint):
    """Delete the given endpoint.

    Does not delete any associated backends.
    """
    retry_actor_failures(master_actor.delete_endpoint, endpoint)
Exemplo n.º 7
0
    async def __init__(self):
        # Note: Several queues are used in the router
        # - When a request come in, it's placed inside its corresponding
        #   endpoint_queue.
        # - The endpoint_queue is dequeued during flush operation, which moves
        #   the queries to backend buffer_queue. Here we match a request
        #   for an endpoint to a backend given some policy.
        # - The worker_queue is used to collect idle actor handle. These
        #   handles are dequed during the second stage of flush operation,
        #   which assign queries in buffer_queue to actor handle.

        # -- Queues -- #

        # endpoint_name -> request queue
        self.endpoint_queues: DefaultDict[asyncio.Queue[Query]] = defaultdict(
            asyncio.Queue)
        # backend_name -> worker request queue
        self.worker_queues: DefaultDict[asyncio.Queue[
            ray.actor.ActorHandle]] = defaultdict(asyncio.Queue)
        # backend_name -> worker payload queue
        self.backend_queues = defaultdict(blist.sortedlist)

        # -- Metadata -- #

        # endpoint_name -> traffic_policy
        self.traffic = dict()
        # backend_name -> backend_config
        self.backend_info = dict()
        # replica tag -> worker_handle
        self.replicas = dict()

        # -- Synchronization -- #

        # This lock guarantee that only one flush operation can happen at a
        # time. Without the lock, multiple flush operation can pop from the
        # same buffer_queue and worker_queue and create deadlock. For example,
        # an operation holding the only query and the other flush operation
        # holding the only idle replica. Additionally, allowing only one flush
        # operation at a time simplifies design overhead for custom queuing and
        # batching polcies.
        self.flush_lock = asyncio.Lock()

        # Fetch the worker handles, traffic policies, and backend configs from
        # the master actor. We use a "pull-based" approach instead of pushing
        # them from the master so that the router can transparently recover
        # from failure.
        ray.serve.init()
        master_actor = ray.serve.api._get_master_actor()

        traffic_policies = retry_actor_failures(
            master_actor.get_traffic_policies)
        for endpoint, traffic_policy in traffic_policies.items():
            await self.set_traffic(endpoint, traffic_policy)

        backend_dict = retry_actor_failures(
            master_actor.get_all_worker_handles)
        for backend_tag, replica_dict in backend_dict.items():
            for replica_tag, worker in replica_dict.items():
                await self.add_new_worker(backend_tag, replica_tag, worker)

        backend_configs = retry_actor_failures(
            master_actor.get_backend_configs)
        for backend, backend_config in backend_configs.items():
            await self.set_backend_config(backend, backend_config)

        self.metric_client = MetricClient.connect_from_serve()
        self.num_router_requests = self.metric_client.new_counter(
            "num_router_requests",
            description="Number of requests processed by the router.",
            label_names=("endpoint", ))
        self.num_error_endpoint_request = self.metric_client.new_counter(
            "num_error_endpoint_requests",
            description=("Number of requests errored when getting result "
                         "for endpoint."),
            label_names=("endpoint", ))
        self.num_error_backend_request = self.metric_client.new_counter(
            "num_error_backend_requests",
            description=("Number of requests errored when getting result "
                         "from backend."),
            label_names=("backend", ))
Exemplo n.º 8
0
def list_backends():
    """Returns a dictionary of all registered backends.

    Dictionary maps backend tags to backend configs.
    """
    return retry_actor_failures(master_actor.get_all_backends)