Exemplo n.º 1
0
def test_worker_restart(serve_instance):
    serve.init()
    serve.create_endpoint("worker_failure", "/worker_failure")

    class Worker1:
        def __call__(self):
            return os.getpid()

    serve.create_backend("worker_failure:v1", Worker1)
    serve.set_traffic("worker_failure", {"worker_failure:v1": 1.0})

    # Get the PID of the worker.
    old_pid = request_with_retries("/worker_failure", timeout=1).text

    # Kill the worker.
    handles = _get_worker_handles("worker_failure:v1")
    assert len(handles) == 1
    ray.kill(handles[0], no_restart=False)

    # Wait until the worker is killed and a one is started.
    start = time.time()
    while time.time() - start < 30:
        response = request_with_retries("/worker_failure", timeout=30)
        if response.text != old_pid:
            break
    else:
        assert False, "Timed out waiting for worker to die."
Exemplo n.º 2
0
def test_e2e(serve_instance):
    serve.init()
    serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"])

    retry_count = 5
    timeout_sleep = 0.5
    while True:
        try:
            resp = requests.get(
                "http://127.0.0.1:8000/-/routes", timeout=0.5).json()
            assert resp == {"/api": ["endpoint", ["GET", "POST"]]}
            break
        except Exception as e:
            time.sleep(timeout_sleep)
            timeout_sleep *= 2
            retry_count -= 1
            if retry_count == 0:
                assert False, ("Route table hasn't been updated after 3 tries."
                               "The latest error was {}").format(e)

    def function(flask_request):
        return {"method": flask_request.method}

    serve.create_backend("echo:v1", function)
    serve.set_traffic("endpoint", {"echo:v1": 1.0})

    resp = requests.get("http://127.0.0.1:8000/api").json()["method"]
    assert resp == "GET"

    resp = requests.post("http://127.0.0.1:8000/api").json()["method"]
    assert resp == "POST"
Exemplo n.º 3
0
def test_router_failure(serve_instance):
    serve.init()
    serve.create_endpoint("router_failure", "/router_failure")

    def function():
        return "hello1"

    serve.create_backend("router_failure:v1", function)
    serve.set_traffic("router_failure", {"router_failure:v1": 1.0})

    assert request_with_retries("/router_failure", timeout=5).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/router_failure", timeout=30)
        assert response.text == "hello1"

    _kill_router()

    for _ in range(10):
        response = request_with_retries("/router_failure", timeout=30)
        assert response.text == "hello1"

    def function():
        return "hello2"

    serve.create_backend("router_failure:v2", function)
    serve.set_traffic("router_failure", {"router_failure:v2": 1.0})

    for _ in range(10):
        response = request_with_retries("/router_failure", timeout=30)
        assert response.text == "hello2"
Exemplo n.º 4
0
def test_delete_endpoint(serve_instance, route):
    endpoint_name = "delete_endpoint" + str(route)
    serve.create_endpoint(endpoint_name, route=route)
    serve.delete_endpoint(endpoint_name)

    # Check that we can reuse a deleted endpoint name and route.
    serve.create_endpoint(endpoint_name, route=route)

    def function():
        return "hello"

    serve.create_backend("delete-endpoint:v1", function)
    serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0})

    if route is not None:
        assert requests.get(
            "http://127.0.0.1:8000/delete-endpoint").text == "hello"
    else:
        handle = serve.get_handle(endpoint_name)
        assert ray.get(handle.remote()) == "hello"

    # Check that deleting the endpoint doesn't delete the backend.
    serve.delete_endpoint(endpoint_name)
    serve.create_endpoint(endpoint_name, route=route)
    serve.set_traffic(endpoint_name, {"delete-endpoint:v1": 1.0})

    if route is not None:
        assert requests.get(
            "http://127.0.0.1:8000/delete-endpoint").text == "hello"
    else:
        handle = serve.get_handle(endpoint_name)
        assert ray.get(handle.remote()) == "hello"
Exemplo n.º 5
0
def test_batching(serve_instance):
    class BatchingExample:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            self.count += 1
            batch_size = serve.context.batch_size
            return [self.count] * batch_size

    serve.create_endpoint("counter1", "/increment2")

    # Keep checking the routing table until /increment is populated
    while "/increment2" not in requests.get(
            "http://127.0.0.1:8000/-/routes").json():
        time.sleep(0.2)

    # set the max batch size
    serve.create_backend(
        "counter:v11", BatchingExample, config={"max_batch_size": 5})
    serve.set_traffic("counter1", {"counter:v11": 1.0})

    future_list = []
    handle = serve.get_handle("counter1")
    for _ in range(20):
        f = handle.remote(temp=1)
        future_list.append(f)

    counter_result = ray.get(future_list)
    # since count is only updated per batch of queries
    # If there atleast one __call__ fn call with batch size greater than 1
    # counter result will always be less than 20
    assert max(counter_result) < 20
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False):
    print("Serving checkpoint: {}".format(checkpoint))

    checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint,
                                                    config, metrics)

    serve.init()
    backend_name = "mnist:day_{}".format(day)

    serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config,
                         metrics, gpu)

    if "mnist" not in serve.list_endpoints():
        # First time we serve a model - create endpoint
        serve.create_endpoint("mnist",
                              backend=backend_name,
                              route="/mnist",
                              methods=["POST"])
    else:
        # The endpoint already exists, route all traffic to the new model
        # Here you could also implement an incremental rollout, where only
        # a part of the traffic is sent to the new backend and the
        # rest is sent to the existing backends.
        serve.set_traffic("mnist", {backend_name: 1.0})

    # Delete previous existing backends
    for existing_backend in serve.list_backends():
        if existing_backend.startswith("mnist:day") and \
           existing_backend != backend_name:
            serve.delete_backend(existing_backend)

    return True
Exemplo n.º 7
0
def test_http_proxy_failure(serve_instance):
    serve.init()
    serve.create_endpoint("proxy_failure", "/proxy_failure", methods=["GET"])

    def function():
        return "hello1"

    serve.create_backend(function, "proxy_failure:v1")
    serve.set_traffic("proxy_failure", {"proxy_failure:v1": 1.0})

    assert request_with_retries("/proxy_failure", timeout=0.1).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/proxy_failure", timeout=30)
        assert response.text == "hello1"

    _kill_http_proxy()

    def function():
        return "hello2"

    serve.create_backend(function, "proxy_failure:v2")
    serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0})

    for _ in range(10):
        response = request_with_retries("/proxy_failure", timeout=30)
        assert response.text == "hello2"
Exemplo n.º 8
0
def test_worker_replica_failure(serve_instance):
    serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0
    serve.init()
    serve.create_endpoint(
        "replica_failure", "/replica_failure", methods=["GET"])

    class Worker:
        # Assumes that two replicas are started. Will hang forever in the
        # constructor for any workers that are restarted.
        def __init__(self, path):
            self.should_hang = False
            if not os.path.exists(path):
                with open(path, "w") as f:
                    f.write("1")
            else:
                with open(path, "r") as f:
                    num = int(f.read())

                with open(path, "w") as f:
                    if num == 2:
                        self.should_hang = True
                    else:
                        f.write(str(num + 1))

            if self.should_hang:
                while True:
                    pass

        def __call__(self):
            pass

    temp_path = tempfile.gettempdir() + "/" + serve.utils.get_random_letters()
    serve.create_backend(Worker, "replica_failure", temp_path)
    backend_config = serve.get_backend_config("replica_failure")
    backend_config.num_replicas = 2
    serve.set_backend_config("replica_failure", backend_config)
    serve.set_traffic("replica_failure", {"replica_failure": 1.0})

    # Wait until both replicas have been started.
    responses = set()
    while len(responses) == 1:
        responses.add(
            request_with_retries("/replica_failure", timeout=0.1).text)
        time.sleep(0.1)

    # Kill one of the replicas.
    handles = _get_worker_handles("replica_failure")
    assert len(handles) == 2
    ray.kill(handles[0])

    # Check that the other replica still serves requests.
    for _ in range(10):
        while True:
            try:
                # The timeout needs to be small here because the request to
                # the restarting worker will hang.
                request_with_retries("/replica_failure", timeout=0.1)
                break
            except TimeoutError:
                time.sleep(0.1)
Exemplo n.º 9
0
def test_list_endpoints(serve_instance):
    serve.init()

    def f():
        pass

    serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"])
    serve.create_endpoint("endpoint2", methods=["POST"])
    serve.create_backend("backend", f)
    serve.set_traffic("endpoint2", {"backend": 1.0})

    endpoints = serve.list_endpoints()
    assert "endpoint" in endpoints
    assert endpoints["endpoint"] == {
        "route": "/api",
        "methods": ["GET", "POST"],
        "traffic": {}
    }

    assert "endpoint2" in endpoints
    assert endpoints["endpoint2"] == {
        "route": None,
        "methods": ["POST"],
        "traffic": {
            "backend": 1.0
        }
    }

    serve.delete_endpoint("endpoint")
    assert "endpoint2" in serve.list_endpoints()

    serve.delete_endpoint("endpoint2")
    assert len(serve.list_endpoints()) == 0
Exemplo n.º 10
0
def test_http_proxy_failure(serve_instance):
    serve.init()

    def function():
        return "hello1"

    serve.create_backend("proxy_failure:v1", function)
    serve.create_endpoint("proxy_failure",
                          backend="proxy_failure:v1",
                          route="/proxy_failure")

    assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/proxy_failure", timeout=30)
        assert response.text == "hello1"

    _kill_routers()

    def function():
        return "hello2"

    serve.create_backend("proxy_failure:v2", function)
    serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0})

    for _ in range(10):
        response = request_with_retries("/proxy_failure", timeout=30)
        assert response.text == "hello2"
Exemplo n.º 11
0
def test_http_proxy_failure(serve_instance):
    def function(_):
        return "hello1"

    serve.create_backend("proxy_failure:v1", function)
    serve.create_endpoint("proxy_failure",
                          backend="proxy_failure:v1",
                          route="/proxy_failure")

    assert request_with_retries("/proxy_failure", timeout=1.0).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/proxy_failure", timeout=30)
        assert response.text == "hello1"

    _kill_http_proxies()

    def function(_):
        return "hello2"

    serve.create_backend("proxy_failure:v2", function)
    serve.set_traffic("proxy_failure", {"proxy_failure:v2": 1.0})

    def check_new():
        for _ in range(10):
            response = request_with_retries("/proxy_failure", timeout=30)
            if response.text != "hello2":
                return False
        return True

    wait_for_condition(check_new)
Exemplo n.º 12
0
def test_set_traffic_missing_data(serve_instance):
    endpoint_name = "foobar"
    backend_name = "foo_backend"
    serve.create_backend(backend_name, lambda: 5)
    serve.create_endpoint(endpoint_name, backend=backend_name)
    with pytest.raises(ValueError):
        serve.set_traffic(endpoint_name, {"nonexistent_backend": 1.0})
    with pytest.raises(ValueError):
        serve.set_traffic("nonexistent_endpoint_name", {backend_name: 1.0})
Exemplo n.º 13
0
async def test_system_metric_endpoints(serve_instance):
    def test_error_counter(flask_request):
        1 / 0

    serve.create_backend("m:v1", test_error_counter)
    serve.create_endpoint("test_metrics", backend="m:v1", route="/measure")
    serve.set_traffic("test_metrics", {"m:v1": 1})

    # Check metrics are exposed under http endpoint
    def test_metric_endpoint():
        requests.get("http://127.0.0.1:8000/measure", timeout=5)
        in_memory_metric = requests.get(
            "http://127.0.0.1:8000/-/metrics", timeout=5).json()

        # We don't want to check the values since this check might be retried.
        in_memory_metric_without_values = []
        for m in in_memory_metric:
            m.pop("value")
            in_memory_metric_without_values.append(m)

        target_metrics = [{
            "info": {
                "name": "num_http_requests",
                "type": "MetricType.COUNTER",
                "route": "/measure"
            },
        }, {
            "info": {
                "name": "num_router_requests",
                "type": "MetricType.COUNTER",
                "endpoint": "test_metrics"
            },
        }, {
            "info": {
                "name": "backend_error_counter",
                "type": "MetricType.COUNTER",
                "backend": "m:v1"
            },
        }]

        for target in target_metrics:
            assert target in in_memory_metric_without_values

    success = False
    for _ in range(3):
        try:
            test_metric_endpoint()
            success = True
            break
        except (AssertionError, requests.ReadTimeout):
            # Metrics may not have been propagated yet
            time.sleep(2)
            print("Metric not correct, retrying...")
    if not success:
        test_metric_endpoint()
Exemplo n.º 14
0
def test_controller_failure(serve_instance):
    serve.init()

    def function():
        return "hello1"

    serve.create_backend("controller_failure:v1", function)
    serve.create_endpoint(
        "controller_failure",
        backend="controller_failure:v1",
        route="/controller_failure")

    assert request_with_retries(
        "/controller_failure", timeout=1).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello1"

    ray.kill(serve.api._get_controller(), no_restart=False)

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello1"

    def function():
        return "hello2"

    ray.kill(serve.api._get_controller(), no_restart=False)

    serve.create_backend("controller_failure:v2", function)
    serve.set_traffic("controller_failure", {"controller_failure:v2": 1.0})

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello2"

    def function():
        return "hello3"

    ray.kill(serve.api._get_controller(), no_restart=False)
    serve.create_backend("controller_failure_2", function)
    ray.kill(serve.api._get_controller(), no_restart=False)
    serve.create_endpoint(
        "controller_failure_2",
        backend="controller_failure_2",
        route="/controller_failure_2")
    ray.kill(serve.api._get_controller(), no_restart=False)

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello2"
        response = request_with_retries("/controller_failure_2", timeout=30)
        assert response.text == "hello3"
Exemplo n.º 15
0
def test_no_route(serve_instance):
    serve.create_endpoint("noroute-endpoint")

    def func(_, i=1):
        return 1

    serve.create_backend("backend:1", func)
    serve.set_traffic("noroute-endpoint", {"backend:1": 1.0})
    service_handle = serve.get_handle("noroute-endpoint")
    result = ray.get(service_handle.remote(i=1))
    assert result == 1
Exemplo n.º 16
0
def test_controller_failure(serve_instance):
    def function(_):
        return "hello1"

    serve.create_backend("controller_failure:v1", function)
    serve.create_endpoint(
        "controller_failure",
        backend="controller_failure:v1",
        route="/controller_failure")

    assert request_with_retries(
        "/controller_failure", timeout=1).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello1"

    ray.kill(serve.api._global_client._controller, no_restart=False)

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello1"

    def function(_):
        return "hello2"

    ray.kill(serve.api._global_client._controller, no_restart=False)

    serve.create_backend("controller_failure:v2", function)
    serve.set_traffic("controller_failure", {"controller_failure:v2": 1.0})

    def check_controller_failure():
        response = request_with_retries("/controller_failure", timeout=30)
        return response.text == "hello2"

    wait_for_condition(check_controller_failure)

    def function(_):
        return "hello3"

    ray.kill(serve.api._global_client._controller, no_restart=False)
    serve.create_backend("controller_failure_2", function)
    ray.kill(serve.api._global_client._controller, no_restart=False)
    serve.create_endpoint(
        "controller_failure_2",
        backend="controller_failure_2",
        route="/controller_failure_2")
    ray.kill(serve.api._global_client._controller, no_restart=False)

    for _ in range(10):
        response = request_with_retries("/controller_failure", timeout=30)
        assert response.text == "hello2"
        response = request_with_retries("/controller_failure_2", timeout=30)
        assert response.text == "hello3"
Exemplo n.º 17
0
def test_nonblocking():
    serve.init()
    serve.create_endpoint("nonblocking", "/nonblocking")

    def function(flask_request):
        return {"method": flask_request.method}

    serve.create_backend(function, "nonblocking:v1")
    serve.set_traffic("nonblocking", {"nonblocking:v1": 1.0})

    resp = requests.get("http://127.0.0.1:8000/nonblocking").json()["method"]
    assert resp == "GET"
Exemplo n.º 18
0
    def create_endpoint(self):
        if len(self.endpoints) == self.max_endpoints:
            endpoint_to_delete = self.endpoints.pop()
            serve.delete_endpoint(endpoint_to_delete)
            serve.delete_backend(endpoint_to_delete)

        new_endpoint = "".join(
            [random.choice(string.ascii_letters) for _ in range(10)])

        def handler(self, *args):
            return new_endpoint

        serve.create_backend(new_endpoint, handler)
        serve.create_endpoint(new_endpoint, "/" + new_endpoint)
        serve.set_traffic(new_endpoint, {new_endpoint: 1.0})

        self.endpoints.append(new_endpoint)
Exemplo n.º 19
0
def test_batching_exception(serve_instance):
    class NoListReturned:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            batch_size = serve.context.batch_size
            return batch_size

    serve.create_endpoint("exception-test", "/noListReturned")
    # set the max batch size
    serve.create_backend(
        "exception:v1", NoListReturned, config={"max_batch_size": 5})
    serve.set_traffic("exception-test", {"exception:v1": 1.0})

    handle = serve.get_handle("exception-test")
    with pytest.raises(ray.exceptions.RayTaskError):
        assert ray.get(handle.remote(temp=1))
Exemplo n.º 20
0
def test_call_method(serve_instance):
    serve.create_endpoint("endpoint", "/api")

    class CallMethod:
        def method(self, request):
            return "hello"

    serve.create_backend("backend", CallMethod)
    serve.set_traffic("endpoint", {"backend": 1.0})

    # Test HTTP path.
    resp = requests.get("http://127.0.0.1:8000/api",
                        timeout=1,
                        headers={"X-SERVE-CALL-METHOD": "method"})
    assert resp.text == "hello"

    # Test serve handle path.
    handle = serve.get_handle("endpoint")
    assert ray.get(handle.options("method").remote()) == "hello"
Exemplo n.º 21
0
def test_cluster_name():
    with pytest.raises(TypeError):
        serve.init(cluster_name=1)

    route = "/api"
    backend = "backend"
    endpoint = "endpoint"

    serve.init(cluster_name="cluster1", blocking=True, http_port=8001)
    serve.create_endpoint(endpoint, route=route)

    def function():
        return "hello1"

    serve.create_backend(backend, function)
    serve.set_traffic(endpoint, {backend: 1.0})

    assert requests.get("http://127.0.0.1:8001" + route).text == "hello1"

    # Create a second cluster on port 8002. Create an endpoint and backend with
    # the same names and check that they don't collide.
    serve.init(cluster_name="cluster2", blocking=True, http_port=8002)
    serve.create_endpoint(endpoint, route=route)

    def function():
        return "hello2"

    serve.create_backend(backend, function)
    serve.set_traffic(endpoint, {backend: 1.0})

    assert requests.get("http://127.0.0.1:8001" + route).text == "hello1"
    assert requests.get("http://127.0.0.1:8002" + route).text == "hello2"

    # Check that deleting the backend in the current cluster doesn't.
    serve.delete_endpoint(endpoint)
    serve.delete_backend(backend)
    assert requests.get("http://127.0.0.1:8001" + route).text == "hello1"

    # Check that we can re-connect to the first cluster.
    serve.init(cluster_name="cluster1")
    serve.delete_endpoint(endpoint)
    serve.delete_backend(backend)
Exemplo n.º 22
0
def test_scaling_replicas(serve_instance):
    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, _):
            self.count += 1
            return self.count

    serve.create_endpoint("counter", "/increment")

    # Keep checking the routing table until /increment is populated
    while "/increment" not in requests.get(
            "http://127.0.0.1:8000/-/routes").json():
        time.sleep(0.2)

    b_config = BackendConfig(num_replicas=2)
    serve.create_backend(Counter, "counter:v1", backend_config=b_config)
    serve.set_traffic("counter", {"counter:v1": 1.0})

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)

    # If the load is shared among two replicas. The max result cannot be 10.
    assert max(counter_result) < 10

    b_config = serve.get_backend_config("counter:v1")
    b_config.num_replicas = 1
    serve.set_backend_config("counter:v1", b_config)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)
    # Give some time for a replica to spin down. But majority of the request
    # should be served by the only remaining replica.
    assert max(counter_result) - min(counter_result) > 6
Exemplo n.º 23
0
def test_handle_in_endpoint(serve_instance):
    serve.init()

    class Endpoint1:
        def __call__(self, flask_request):
            return "hello"

    class Endpoint2:
        def __init__(self):
            self.handle = serve.get_handle("endpoint1", missing_ok=True)

        def __call__(self):
            return ray.get(self.handle.remote())

    serve.create_endpoint("endpoint1", "/endpoint1", methods=["GET", "POST"])
    serve.create_backend(Endpoint1, "endpoint1:v0")
    serve.set_traffic("endpoint1", {"endpoint1:v0": 1.0})

    serve.create_endpoint("endpoint2", "/endpoint2", methods=["GET", "POST"])
    serve.create_backend(Endpoint2, "endpoint2:v0")
    serve.set_traffic("endpoint2", {"endpoint2:v0": 1.0})

    assert requests.get("http://127.0.0.1:8000/endpoint2").text == "hello"
Exemplo n.º 24
0
def test_parallel_start(serve_instance):
    # Test the ability to start multiple replicas in parallel.
    # In the past, when Serve scale up a backend, it does so one by one and
    # wait for each replica to initialize. This test avoid this by preventing
    # the first replica to finish initialization unless the second replica is
    # also started.
    @ray.remote
    class Barrier:
        def __init__(self, release_on):
            self.release_on = release_on
            self.current_waiters = 0
            self.event = asyncio.Event()

        async def wait(self):
            self.current_waiters += 1
            if self.current_waiters == self.release_on:
                self.event.set()
            else:
                await self.event.wait()

    barrier = Barrier.remote(release_on=2)

    class LongStartingServable:
        def __init__(self):
            ray.get(barrier.wait.remote(), timeout=10)

        def __call__(self, _):
            return "Ready"

    serve.create_endpoint("test-parallel")
    serve.create_backend("p:v0",
                         LongStartingServable,
                         config={"num_replicas": 2})
    serve.set_traffic("test-parallel", {"p:v0": 1})
    handle = serve.get_handle("test-parallel")

    ray.get(handle.remote(), timeout=10)
Exemplo n.º 25
0
def test_shard_key(serve_instance, route):
    # Create five backends that return different integers.
    num_backends = 5
    traffic_dict = {}
    for i in range(num_backends):

        def function():
            return i

        backend_name = "backend-split-" + str(i)
        traffic_dict[backend_name] = 1.0 / num_backends
        serve.create_backend(backend_name, function)

    serve.create_endpoint("endpoint",
                          backend=list(traffic_dict.keys())[0],
                          route=route)
    serve.set_traffic("endpoint", traffic_dict)

    def do_request(shard_key):
        if route is not None:
            url = "http://127.0.0.1:8000" + route
            headers = {"X-SERVE-SHARD-KEY": shard_key}
            result = requests.get(url, headers=headers).text
        else:
            handle = serve.get_handle("endpoint").options(shard_key=shard_key)
            result = ray.get(handle.options(shard_key=shard_key).remote())
        return result

    # Send requests with different shard keys and log the backends they go to.
    shard_keys = [get_random_letters() for _ in range(20)]
    results = {}
    for shard_key in shard_keys:
        results[shard_key] = do_request(shard_key)

    # Check that the shard keys are mapped to the same backends.
    for shard_key in shard_keys:
        assert do_request(shard_key) == results[shard_key]
Exemplo n.º 26
0
def test_delete_backend(serve_instance):
    serve.create_endpoint("delete_backend", "/delete-backend")

    def function():
        return "hello"

    serve.create_backend("delete:v1", function)
    serve.set_traffic("delete_backend", {"delete:v1": 1.0})

    assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello"

    # Check that we can't delete the backend while it's in use.
    with pytest.raises(ValueError):
        serve.delete_backend("delete:v1")

    serve.create_backend("delete:v2", function)
    serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5})

    with pytest.raises(ValueError):
        serve.delete_backend("delete:v1")

    # Check that the backend can be deleted once it's no longer in use.
    serve.set_traffic("delete_backend", {"delete:v2": 1.0})
    serve.delete_backend("delete:v1")

    # Check that we can no longer use the previously deleted backend.
    with pytest.raises(ValueError):
        serve.set_traffic("delete_backend", {"delete:v1": 1.0})

    def function2():
        return "olleh"

    # Check that we can now reuse the previously delete backend's tag.
    serve.create_backend("delete:v1", function2)
    serve.set_traffic("delete_backend", {"delete:v1": 1.0})

    assert requests.get("http://127.0.0.1:8000/delete-backend").text == "olleh"
Exemplo n.º 27
0
def test_delete_backend(serve_instance):
    def function(_):
        return "hello"

    serve.create_backend("delete:v1", function)
    serve.create_endpoint("delete_backend",
                          backend="delete:v1",
                          route="/delete-backend")

    assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello"

    # Check that we can't delete the backend while it's in use.
    with pytest.raises(ValueError):
        serve.delete_backend("delete:v1")

    serve.create_backend("delete:v2", function)
    serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5})

    with pytest.raises(ValueError):
        serve.delete_backend("delete:v1")

    # Check that the backend can be deleted once it's no longer in use.
    serve.set_traffic("delete_backend", {"delete:v2": 1.0})
    serve.delete_backend("delete:v1")

    # Check that we can no longer use the previously deleted backend.
    with pytest.raises(ValueError):
        serve.set_traffic("delete_backend", {"delete:v1": 1.0})

    def function2(_):
        return "olleh"

    # Check that we can now reuse the previously delete backend's tag.
    serve.create_backend("delete:v1", function2)
    serve.set_traffic("delete_backend", {"delete:v1": 1.0})

    for _ in range(10):
        try:
            assert requests.get(
                "http://127.0.0.1:8000/delete-backend").text == "olleh"
            break
        except AssertionError:
            time.sleep(0.5)  # wait for the traffic policy to propogate
    else:
        assert requests.get(
            "http://127.0.0.1:8000/delete-backend").text == "olleh"
Exemplo n.º 28
0
def test_master_failure(serve_instance):
    serve.init()
    serve.create_endpoint("master_failure", "/master_failure")

    def function():
        return "hello1"

    serve.create_backend("master_failure:v1", function)
    serve.set_traffic("master_failure", {"master_failure:v1": 1.0})

    assert request_with_retries("/master_failure", timeout=1).text == "hello1"

    for _ in range(10):
        response = request_with_retries("/master_failure", timeout=30)
        assert response.text == "hello1"

    ray.kill(serve.api._get_master_actor())

    for _ in range(10):
        response = request_with_retries("/master_failure", timeout=30)
        assert response.text == "hello1"

    def function():
        return "hello2"

    ray.kill(serve.api._get_master_actor())

    serve.create_backend("master_failure:v2", function)
    serve.set_traffic("master_failure", {"master_failure:v2": 1.0})

    for _ in range(10):
        response = request_with_retries("/master_failure", timeout=30)
        assert response.text == "hello2"

    def function():
        return "hello3"

    ray.kill(serve.api._get_master_actor())
    serve.create_endpoint("master_failure_2", "/master_failure_2")
    ray.kill(serve.api._get_master_actor())
    serve.create_backend("master_failure_2", function)
    ray.kill(serve.api._get_master_actor())
    serve.set_traffic("master_failure_2", {"master_failure_2": 1.0})

    for _ in range(10):
        response = request_with_retries("/master_failure", timeout=30)
        assert response.text == "hello2"
        response = request_with_retries("/master_failure_2", timeout=30)
        assert response.text == "hello3"
Exemplo n.º 29
0

def echo_v1(_):
    return "v1"


def echo_v2(_):
    return "v2"


serve.start()

serve.create_backend("echo:v1", echo_v1)
serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")

for _ in range(3):
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))

    print("...Sleeping for 2 seconds...")
    time.sleep(2)

serve.create_backend("echo:v2", echo_v2)
serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
while True:
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))

    print("...Sleeping for 2 seconds...")
    time.sleep(2)
Exemplo n.º 30
0
import ray
from ray import serve


class ComposedModel:
    def __init__(self):
        self.detector_handle = serve.get_handle("object")
        self.alpr_handle = serve.get_handle("alpr")

    async def __call__(self, flask_request):
        image_data = flask_request.data

        object_found = await self.detector_handle.remote(data=image_data)

        if object_found["label"] != "car":
            return {"contains_car": False}
        if object_found["score"] < 0.4:
            return {"contain_car": False}

        license_plate = await self.alpr_handle.remote(data=image_data)
        return {"contains_car": True, "license_plate": license_plate}


ray.init(address="auto")
serve.init()

serve.create_endpoint("composed", "/composed", methods=["POST"])
serve.create_backend("composed:v1", ComposedModel, config={"num_replicas": 2})
serve.set_traffic("composed", {"composed:v1": 1})