async def test_changing_backend(ray_instance, mock_controller, task_runner_mock_actor): q = ray.remote(EndpointRouter).remote(mock_controller, "svc") await mock_controller.set_traffic.remote( "svc", TrafficPolicy({ "backend-alter": 1 })) await mock_controller.add_new_replica.remote("backend-alter", task_runner_mock_actor) await (await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc"), 1)) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 1 await mock_controller.set_traffic.remote( "svc", TrafficPolicy({ "backend-alter-2": 1 })) await mock_controller.add_new_replica.remote("backend-alter-2", task_runner_mock_actor) await (await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc"), 2)) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 2
async def test_split_traffic_random(ray_instance, mock_controller, task_runner_mock_actor): q = ray.remote(Router).remote(mock_controller) await q.setup_in_async_loop.remote() await mock_controller.set_traffic.remote( "svc", TrafficPolicy({ "backend-split": 0.5, "backend-split-2": 0.5 })) runner_1, runner_2 = [mock_task_runner() for _ in range(2)] await mock_controller.add_new_replica.remote("backend-split", runner_1) await mock_controller.add_new_replica.remote("backend-split-2", runner_2) # assume 50% split, the probability of all 20 requests goes to a # single queue is 0.5^20 ~ 1-6 for _ in range(20): await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc", None), 1) got_work = [ await runner.get_recent_call.remote() for runner in (runner_1, runner_2) ] assert [g.args[0] for g in got_work] == [1, 1]
def remote(self, request_data: Optional[Union[Dict, Any]] = None, **kwargs): """Issue an asynchrounous request to the endpoint. Returns a Ray ObjectRef whose results can be waited for or retrieved using ray.wait or ray.get, respectively. Returns: ray.ObjectRef Args: request_data(dict, Any): If it's a dictionary, the data will be available in ``request.json()`` or ``request.form()``. Otherwise, it will be available in ``request.data``. ``**kwargs``: All keyword arguments will be available in ``request.args``. """ request_metadata = RequestMetadata( get_random_letters(10), # Used for debugging. self.endpoint_name, TaskContext.Python, call_method=self.method_name or "__call__", shard_key=self.shard_key, http_method=self.http_method or "GET", http_headers=self.http_headers or dict(), ) return self.router_handle.enqueue_request.remote( request_metadata, request_data, **kwargs)
async def test_alter_backend(serve_instance, task_runner_mock_actor): q = ray.remote(Router).remote() await q.setup.remote("", serve_instance._controller_name) await q.set_traffic.remote("svc", TrafficPolicy({"backend-alter": 1})) await q.add_new_worker.remote("backend-alter", "replica-1", task_runner_mock_actor) await q.enqueue_request.remote(RequestMetadata("svc", None), 1) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 1 await q.set_traffic.remote("svc", TrafficPolicy({"backend-alter-2": 1})) await q.add_new_worker.remote("backend-alter-2", "replica-1", task_runner_mock_actor) await q.enqueue_request.remote(RequestMetadata("svc", None), 2) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 2
async def __call__(self, scope, receive, send): """Implements the ASGI protocol. See details at: https://asgi.readthedocs.io/en/latest/specs/index.html. """ error_sender = self._make_error_sender(scope, receive, send) assert self.route_table is not None, ( "Route table must be set via set_route_table.") assert scope["type"] == "http" current_path = scope["path"] self.request_counter.record(1, tags={"route": current_path}) if current_path.startswith("/-/"): await self._handle_system_request(scope, receive, send) return try: endpoint_name, methods_allowed = self.route_table[current_path] except KeyError: error_message = ( "Path {} not found. " "Please ping http://.../-/routes for routing table" ).format(current_path) await error_sender(error_message, 404) return if scope["method"] not in methods_allowed: error_message = ("Methods {} not allowed. " "Available HTTP methods are {}.").format( scope["method"], methods_allowed) await error_sender(error_message, 405) return http_body_bytes = await self.receive_http_body(scope, receive, send) headers = {k.decode(): v.decode() for k, v in scope["headers"]} request_metadata = RequestMetadata( get_random_letters(10), # Used for debugging. endpoint_name, TaskContext.Web, http_method=scope["method"].upper(), call_method=headers.get("X-SERVE-CALL-METHOD".lower(), "__call__"), shard_key=headers.get("X-SERVE-SHARD-KEY".lower(), None), ) ref = await self.router.assign_request(request_metadata, scope, http_body_bytes) result = await ref if isinstance(result, RayTaskError): error_message = "Task Error. Traceback: {}.".format(result) await error_sender(error_message, 500) else: await Response(result).send(scope, receive, send)
async def test_shard_key(ray_instance, mock_controller, task_runner_mock_actor): q = ray.remote(Router).remote(mock_controller) await q.setup_in_async_loop.remote() num_backends = 5 traffic_dict = {} runners = [mock_task_runner() for _ in range(num_backends)] for i, runner in enumerate(runners): backend_name = "backend-split-" + str(i) traffic_dict[backend_name] = 1.0 / num_backends await mock_controller.add_new_replica.remote(backend_name, runner) await mock_controller.set_traffic.remote("svc", TrafficPolicy(traffic_dict)) # Generate random shard keys and send one request for each. shard_keys = [get_random_letters() for _ in range(100)] for shard_key in shard_keys: await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc", None, shard_key=shard_key), shard_key) # Log the shard keys that were assigned to each backend. runner_shard_keys = defaultdict(set) for i, runner in enumerate(runners): calls = await runner.get_all_calls.remote() for call in calls: runner_shard_keys[i].add(call.args[0]) await runner.clear_calls.remote() # Send queries with the same shard keys a second time. for shard_key in shard_keys: await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc", None, shard_key=shard_key), shard_key) # Check that the requests were all mapped to the same backends. for i, runner in enumerate(runners): calls = await runner.get_all_calls.remote() for call in calls: assert call.args[0] in runner_shard_keys[i]
def _remote(self, deployment_name, handle_options, args, kwargs) -> Coroutine: request_metadata = RequestMetadata( get_random_letters(10), # Used for debugging. deployment_name, call_method=handle_options.method_name, http_arg_is_pickled=self._pickled_http_request, ) coro = self.router.assign_request(request_metadata, *args, **kwargs) return coro
def _remote(self, request_data, kwargs) -> Coroutine: request_metadata = RequestMetadata( get_random_letters(10), # Used for debugging. self.endpoint_name, TaskContext.Python, call_method=self.method_name or "__call__", shard_key=self.shard_key, http_method=self.http_method or "GET", http_headers=self.http_headers or dict(), ) coro = self.router.assign_request(request_metadata, request_data, **kwargs) return coro
def _remote(self, endpoint_name, handle_options, request_data, kwargs) -> Coroutine: request_metadata = RequestMetadata( get_random_letters(10), # Used for debugging. endpoint_name, call_method=handle_options.method_name, shard_key=handle_options.shard_key, http_method=handle_options.http_method, http_headers=handle_options.http_headers, ) coro = self.router.assign_request(request_metadata, request_data, **kwargs) return coro
async def test_single_prod_cons_queue(serve_instance, task_runner_mock_actor): q = ray.remote(Router).remote() await q.setup.remote("", serve_instance._controller_name) q.set_traffic.remote("svc", TrafficPolicy({"backend-single-prod": 1.0})) q.add_new_worker.remote("backend-single-prod", "replica-1", task_runner_mock_actor) # Make sure we get the request result back result = await q.enqueue_request.remote(RequestMetadata("svc", None), 1) assert result == "DONE" # Make sure it's the right request got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 1 assert got_work.kwargs == {}
async def test_simple_endpoint_backend_pair(ray_instance, mock_controller, task_runner_mock_actor): q = ray.remote(Router).remote(mock_controller, "svc") # Propogate configs await mock_controller.set_traffic.remote( "svc", TrafficPolicy({"backend-single-prod": 1.0})) await mock_controller.add_new_replica.remote("backend-single-prod", task_runner_mock_actor) # Make sure we get the request result back ref = await q.assign_request.remote( RequestMetadata(get_random_letters(10), "svc"), 1) result = await ref assert result == "DONE" # Make sure it's the right request got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.args[0] == 1 assert got_work.kwargs == {}
async def test_split_traffic_random(serve_instance, task_runner_mock_actor): q = ray.remote(Router).remote() await q.setup.remote("", serve_instance._controller_name) await q.set_traffic.remote( "svc", TrafficPolicy({ "backend-split": 0.5, "backend-split-2": 0.5 })) runner_1, runner_2 = [mock_task_runner() for _ in range(2)] await q.add_new_worker.remote("backend-split", "replica-1", runner_1) await q.add_new_worker.remote("backend-split-2", "replica-1", runner_2) # assume 50% split, the probability of all 20 requests goes to a # single queue is 0.5^20 ~ 1-6 for _ in range(20): await q.enqueue_request.remote(RequestMetadata("svc", None), 1) got_work = [ await runner.get_recent_call.remote() for runner in (runner_1, runner_2) ] assert [g.args[0] for g in got_work] == [1, 1]
async def test_replica_set(ray_instance): signal = SignalActor.remote() @ray.remote(num_cpus=0) class MockWorker: _num_queries = 0 async def handle_request(self, request): self._num_queries += 1 await signal.wait.remote() return "DONE" async def num_queries(self): return self._num_queries # We will test a scenario with two replicas in the replica set. rs = ReplicaSet() workers = [MockWorker.remote() for _ in range(2)] rs.set_max_concurrent_queries(1) rs.update_worker_replicas(workers) # Send two queries. They should go through the router but blocked by signal # actors. query = Query([], {}, TaskContext.Python, RequestMetadata("request-id", "endpoint", TaskContext.Python)) first_ref = await rs.assign_replica(query) second_ref = await rs.assign_replica(query) # These should be blocked by signal actor. with pytest.raises(ray.exceptions.GetTimeoutError): ray.get([first_ref, second_ref], timeout=1) # Each replica should have exactly one inflight query. Let make sure the # queries arrived there. for worker in workers: while await worker.num_queries.remote() != 1: await asyncio.sleep(1) # Let's try to send another query. third_ref_pending_task = asyncio.get_event_loop().create_task( rs.assign_replica(query)) # We should fail to assign a replica, so this coroutine should still be # pending after some time. await asyncio.sleep(0.2) assert not third_ref_pending_task.done() # Let's unblock the two workers await signal.send.remote() assert await first_ref == "DONE" assert await second_ref == "DONE" # The third request should be unblocked and sent to first worker. # This meas we should be able to get the object ref. third_ref = await third_ref_pending_task # Now we got the object ref, let's get it result. await signal.send.remote() assert await third_ref == "DONE" # Finally, make sure that one of the replica processed the third query. num_queries_set = {(await worker.num_queries.remote()) for worker in workers} assert num_queries_set == {2, 1}
def make_request_param(call_method="__call__"): return RequestMetadata(get_random_letters(10), "endpoint", context.TaskContext.Python, call_method=call_method)
def make_request_param(call_method="__call__"): return RequestMetadata("endpoint", context.TaskContext.Python, call_method=call_method)
async def test_router_use_max_concurrency(serve_instance): signal = SignalActor.remote() @ray.remote class MockWorker: async def handle_request(self, request): await signal.wait.remote() return "DONE" def ready(self): pass class VisibleRouter(Router): def get_queues(self): return self.queries_counter, self.backend_queues worker = MockWorker.remote() q = ray.remote(VisibleRouter).remote() await q.setup.remote("", serve_instance._controller_name) backend_name = "max-concurrent-test" config = BackendConfig(max_concurrent_queries=1) await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0})) await q.add_new_worker.remote(backend_name, "replica-tag", worker) await q.set_backend_config.remote(backend_name, config) # We send over two queries first_query = q.enqueue_request.remote( RequestMetadata(get_random_letters(10), "svc", None), 1) second_query = q.enqueue_request.remote( RequestMetadata(get_random_letters(10), "svc", None), 1) # Neither queries should be available with pytest.raises(ray.exceptions.GetTimeoutError): ray.get([first_query, second_query], timeout=0.2) # Let's retrieve the router internal state queries_counter, backend_queues = await q.get_queues.remote() # There should be just one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # The second query is buffered assert len(backend_queues["max-concurrent-test"]) == 1 # Let's unblock the first query await signal.send.remote(clear=True) assert await first_query == "DONE" # The internal state of router should have changed. queries_counter, backend_queues = await q.get_queues.remote() # There should still be one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # But there shouldn't be any queries in the queue assert len(backend_queues["max-concurrent-test"]) == 0 # Unblocking the second query await signal.send.remote(clear=True) assert await second_query == "DONE" # Checking the internal state of the router one more time queries_counter, backend_queues = await q.get_queues.remote() assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 0 assert len(backend_queues["max-concurrent-test"]) == 0