示例#1
0
    def __init__(self):
        assert ray.is_initialized()

        # Delay import due to GlobalState depends on HTTP actor
        from ray.experimental.serve.global_state import GlobalState
        self.serve_global_state = GlobalState()
        self.route_table_cache = dict()

        self.route_checker_should_shutdown = False
示例#2
0
文件: api.py 项目: pangfd/ray-1
def init(kv_store_connector=None,
         kv_store_path="/tmp/ray_serve.db",
         blocking=False,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={"object_store_memory": int(1e8)},
         gc_window_seconds=3600):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        kv_store_connector (callable): Function of (namespace) => TableObject.
            We will use a SQLite connector that stores to /tmp by default.
        kv_store_path (str, path): Path to the SQLite table.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        gc_window_seconds(int): How long will we keep the metric data in
            memory. Data older than the gc_window will be deleted. The default
            is 3600 seconds, which is 1 hour.
    """
    global global_state

    # Noop if global_state is no longer None
    if global_state is not None:
        return

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Try to get serve nursery if there exists
    try:
        ray.experimental.get_actor(SERVE_NURSERY_NAME)
        global_state = GlobalState()
        return
    except ValueError:
        pass

    # Serve has not been initialized, perform init sequence
    # Todo, move the db to session_dir
    #    ray.worker._global_node.address_info["session_dir"]
    def kv_store_connector(namespace):
        return SQLiteKVStore(namespace, db_path=kv_store_path)

    nursery = start_initial_state(kv_store_connector)

    global_state = GlobalState(nursery)
    global_state.init_or_get_http_server(host=http_host, port=http_port)
    global_state.init_or_get_router()
    global_state.init_or_get_metric_monitor(
        gc_window_seconds=gc_window_seconds)

    if blocking:
        block_until_http_ready("http://{}:{}".format(http_host, http_port))
示例#3
0
文件: api.py 项目: zongzhimin/ray
import inspect

import numpy as np

import ray
from ray.experimental.serve.task_runner import RayServeMixin, TaskRunnerActor
from ray.experimental.serve.utils import pformat_color_json, logger
from ray.experimental.serve.global_state import GlobalState

global_state = GlobalState()


def init(blocking=False, object_store_memory=int(1e8)):
    """Initialize a serve cluster.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy before returns.
        object_store_memory (int): Allocated shared memory size in bytes. The
            default is 100MiB. The default is kept low for latency stability
            reason.
    """
    if not ray.is_initialized():
        ray.init(object_store_memory=object_store_memory)

    # NOTE(simon): Currently the initialization order is fixed.
    # HTTP server depends on the API server.
示例#4
0
class HTTPProxy:
    """
    This class should be instantiated and ran by ASGI server.

    >>> import uvicorn
    >>> uvicorn.run(HTTPProxy(kv_store_actor_handle, router_handle))
    # blocks forever
    """
    def __init__(self):
        assert ray.is_initialized()

        # Delay import due to GlobalState depends on HTTP actor
        from ray.experimental.serve.global_state import GlobalState
        self.serve_global_state = GlobalState()
        self.route_table_cache = dict()

        self.route_checker_should_shutdown = False

    async def route_checker(self, interval):
        while True:
            if self.route_checker_should_shutdown:
                return

            self.route_table_cache = (
                self.serve_global_state.route_table.list_service())

            await asyncio.sleep(interval)

    async def handle_lifespan_message(self, scope, receive, send):
        assert scope["type"] == "lifespan"

        message = await receive()
        if message["type"] == "lifespan.startup":
            await _async_init()
            asyncio.ensure_future(
                self.route_checker(interval=HTTP_ROUTER_CHECKER_INTERVAL_S))
            await send({"type": "lifespan.startup.complete"})
        elif message["type"] == "lifespan.shutdown":
            self.route_checker_should_shutdown = True
            await send({"type": "lifespan.shutdown.complete"})

    async def receive_http_body(self, scope, receive, send):
        body_buffer = []
        more_body = True
        while more_body:
            message = await receive()
            assert message["type"] == "http.request"

            more_body = message["more_body"]
            body_buffer.append(message["body"])

        return b"".join(body_buffer)

    async def __call__(self, scope, receive, send):
        # NOTE: This implements ASGI protocol specified in
        #       https://asgi.readthedocs.io/en/latest/specs/index.html

        if scope["type"] == "lifespan":
            await self.handle_lifespan_message(scope, receive, send)
            return

        assert scope["type"] == "http"
        current_path = scope["path"]
        if current_path == "/":
            await JSONResponse(self.route_table_cache)(scope, receive, send)
            return

        # TODO(simon): Use werkzeug route mapper to support variable path
        if current_path not in self.route_table_cache:
            error_message = ("Path {} not found. "
                             "Please ping http://.../ for routing table"
                             ).format(current_path)
            await JSONResponse({"error": error_message},
                               status_code=404)(scope, receive, send)
            return

        endpoint_name = self.route_table_cache[current_path]
        http_body_bytes = await self.receive_http_body(scope, receive, send)

        # get slo_ms before enqueuing the query
        query_string = scope["query_string"].decode("ascii")
        query_kwargs = parse_qs(query_string)
        request_slo_ms = query_kwargs.pop("slo_ms", None)
        if request_slo_ms is not None:
            try:
                if len(request_slo_ms) != 1:
                    raise ValueError(
                        "Multiple SLO specified, please specific only one.")
                request_slo_ms = request_slo_ms[0]
                request_slo_ms = float(request_slo_ms)
                if request_slo_ms < 0:
                    raise ValueError(
                        "Request SLO must be positive, it is {}".format(
                            request_slo_ms))
            except ValueError as e:
                await JSONResponse({"error": str(e)})(scope, receive, send)
                return

        result_object_id_bytes = await (
            self.serve_global_state.init_or_get_router(
            ).enqueue_request.remote(service=endpoint_name,
                                     request_args=(scope, http_body_bytes),
                                     request_kwargs=dict(),
                                     request_context=TaskContext.Web,
                                     request_slo_ms=request_slo_ms))

        result = await ray.ObjectID(result_object_id_bytes)

        if isinstance(result, ray.exceptions.RayTaskError):
            await JSONResponse({
                "error":
                "internal error, please use python API to debug"
            })(scope, receive, send)
        else:
            await JSONResponse({"result": result})(scope, receive, send)
示例#5
0
文件: api.py 项目: kseager/ray
def init(kv_store_connector=None,
         kv_store_path=None,
         blocking=False,
         start_server=True,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={
             "object_store_memory": int(1e8),
             "num_cpus": max(cpu_count(), 8)
         },
         gc_window_seconds=3600,
         queueing_policy=RoutePolicy.Random,
         policy_kwargs={}):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        kv_store_connector (callable): Function of (namespace) => TableObject.
            We will use a SQLite connector that stores to /tmp by default.
        kv_store_path (str, path): Path to the SQLite table.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        start_server (bool): If true, `serve.init` starts http server.
            (Default: True)
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        gc_window_seconds(int): How long will we keep the metric data in
            memory. Data older than the gc_window will be deleted. The default
            is 3600 seconds, which is 1 hour.
        queueing_policy(RoutePolicy): Define the queueing policy for selecting
            the backend for a service. (Default: RoutePolicy.Random)
        policy_kwargs: Arguments required to instantiate a queueing policy
    """
    global global_state
    # Noop if global_state is no longer None
    if global_state is not None:
        return

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Try to get serve nursery if there exists
    try:
        ray.experimental.get_actor(SERVE_NURSERY_NAME)
        global_state = GlobalState()
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)

    if kv_store_path is None:
        _, kv_store_path = mkstemp()

    # Serve has not been initialized, perform init sequence
    # Todo, move the db to session_dir
    #    ray.worker._global_node.address_info["session_dir"]
    def kv_store_connector(namespace):
        return SQLiteKVStore(namespace, db_path=kv_store_path)

    nursery = start_initial_state(kv_store_connector)

    global_state = GlobalState(nursery)
    if start_server:
        global_state.init_or_get_http_server(host=http_host, port=http_port)
    global_state.init_or_get_router(queueing_policy=queueing_policy,
                                    policy_kwargs=policy_kwargs)
    global_state.init_or_get_metric_monitor(
        gc_window_seconds=gc_window_seconds)

    if start_server and blocking:
        block_until_http_ready("http://{}:{}".format(http_host, http_port))