Python start примеры, ray.serve.start Python примеры использования

Пример #1

0

Показать файл

def test_snapshot_always_written_to_internal_kv(
    ray_start_with_dashboard, ray_shutdown  # noqa: F811
):
    # https://github.com/ray-project/ray/issues/19752
    _, tmp_path = mkstemp()

    @serve.deployment()
    def hello(_):
        return "hello"

    def check():
        try:
            resp = requests.get("http://localhost:8000/hello")
            assert resp.text == "hello"
            return True
        except Exception:
            return False

    serve.start(detached=True, _checkpoint_path=f"file://{tmp_path}")
    hello.deploy()
    check()

    webui_url = ray_start_with_dashboard["webui_url"]

    def get_deployment_snapshot():
        snapshot = requests.get(f"http://{webui_url}/api/snapshot").json()["data"][
            "snapshot"
        ]
        return snapshot["deployments"]

    # Make sure /api/snapshot return non-empty deployment status.
    def verify_snapshot():
        return get_deployment_snapshot() != {}

    wait_for_condition(verify_snapshot)

    # Sanity check the snapshot is correct
    snapshot = get_deployment_snapshot()
    assert len(snapshot) == 1
    hello_deployment = list(snapshot.values())[0]
    assert hello_deployment["name"] == "hello"
    assert hello_deployment["status"] == "RUNNING"

Пример #2

0

Показать файл

Файл: conftest.py Проект: zseymour/ray

def _shared_serve_instance():
    # Uncomment the line below to turn on debug log for tests.
    # os.environ["SERVE_LOG_DEBUG"] = "1"
    # Overriding task_retry_delay_ms to relaunch actors more quickly
    ray.init(num_cpus=36,
             _metrics_export_port=9999,
             _system_config={
                 "metrics_report_interval_ms": 1000,
                 "task_retry_delay_ms": 50
             })
    yield serve.start(detached=True)

Пример #3

0

Показать файл

def test_start_idempotent(serve_instance):
    @serve.deployment(name="start")
    def func(*args):
        pass

    func.deploy()

    assert "start" in serve.list_backends()
    serve.start(detached=True)
    serve.start()
    serve.start(detached=True)
    serve.start()
    assert "start" in serve.list_backends()

Пример #4

0

Показать файл

def test_update_num_replicas_anonymous_namespace(shutdown_ray, detached):
    """Test updating num_replicas with anonymous namespace."""

    ray.init()
    serve.start(detached=detached)

    @serve.deployment(num_replicas=1)
    def f(*args):
        return "got f"

    f.deploy()

    num_actors = len(ray.util.list_named_actors(all_namespaces=True))

    for _ in range(5):
        f.deploy()
        assert num_actors == len(
            ray.util.list_named_actors(all_namespaces=True))

    serve.shutdown()

Пример #5

0

Показать файл

Файл: scripts.py Проект: ray-project/ray

def start(
    address,
    http_host,
    http_port,
    http_location,
    checkpoint_path,
):
    ray.init(
        address=address,
        namespace=SERVE_NAMESPACE,
    )
    serve.start(
        detached=True,
        http_options=dict(
            host=http_host,
            port=http_port,
            location=http_location,
        ),
        _checkpoint_path=checkpoint_path,
    )

Пример #6

0

Показать файл

Файл: servehandle_fastapi.py Проект: zhangbushi10/ray

async def startup_event():
    ray.init(address="auto")  # Connect to the running Ray cluster.
    serve.start(http_host=None)  # Start the Ray Serve instance.

    # Define a callable class to use for our Ray Serve backend.
    class GPT2:
        def __init__(self):
            self.nlp_model = pipeline("text-generation", model="gpt2")

        async def __call__(self, request):
            return self.nlp_model(await request.body(), max_length=50)

    # Set up a Ray Serve backend with the desired number of replicas.
    backend_config = serve.BackendConfig(num_replicas=2)
    serve.create_backend("gpt-2", GPT2, config=backend_config)
    serve.create_endpoint("generate", backend="gpt-2")

    # Get a handle to our Ray Serve endpoint so we can query it in Python.
    global serve_handle
    serve_handle = serve.get_handle("generate")

Пример #7

0

Показать файл

def test_serve_shutdown(ray_shutdown):
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    A.deploy()

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    A.deploy()

    assert len(serve.list_deployments()) == 1

Пример #8

0

Показать файл

Файл: test_standalone2.py Проект: miqdigital/ray

def test_deploy_with_overriden_namespace(shutdown_ray, detached):
    """Test deployments with overriden namespace."""

    ray_namespace = "ray_namespace"
    controller_namespace = "controller_namespace"

    ray.init(namespace=ray_namespace)
    serve.start(detached=detached,
                _override_controller_namespace=controller_namespace)

    for iteration in range(2):

        @serve.deployment
        def f(*args):
            return f"{iteration}"

        f.deploy()
        assert requests.get("http://localhost:8000/f").text == f"{iteration}"

    serve.shutdown()

Пример #9

0

Показать файл

Файл: test_standalone.py Проект: marload/ray

def test_http_head_only(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)

    ray.init(head_node.address)
    node_ids = ray.state.node_ids()
    assert len(node_ids) == 2

    serve.start(http_options={"port": new_port(), "location": "HeadOnly"})

    # Only the controller and head node actor should be started
    assert len(ray.state.actors()) == 2

    # They should all be placed on the head node
    cpu_per_nodes = {
        r["CPU"]
        for r in ray.state.state._available_resources_per_node().values()
    }
    assert cpu_per_nodes == {4, 4}

Пример #10

0

Показать файл

Файл: test_standalone.py Проект: DahyunJang/ray

def test_shutdown(ray_shutdown):
    def f():
        pass

    ray.init(num_cpus=16)
    serve.start(http_port=8003)
    serve.create_backend("backend", f)
    serve.create_endpoint("endpoint", backend="backend")

    actor_names = [
        serve.api._global_client._controller_name,
        format_actor_name(SERVE_PROXY_NAME,
                          serve.api._global_client._controller_name,
                          get_all_node_ids()[0][0])
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_backends()

    def check_dead():
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)

Пример #11

0

Показать файл

Файл: test_api.py Проект: zaouk/ray

def test_no_http(serve_instance):
    client = serve.start(http_host=None)

    assert len(ray.get(client._controller.get_http_proxies.remote())) == 0

    def hello(*args):
        return "hello"

    client.create_backend("backend", hello)
    client.create_endpoint("endpoint", backend="backend")

    assert ray.get(client.get_handle("endpoint").remote()) == "hello"

Пример #12

0

Показать файл

def test_controller_starts_java_replica(shutdown_only):  # noqa: F811
    ray.init(
        num_cpus=8,
        namespace="default_test_namespace",
        # A dummy code search path to enable cross language.
        job_config=JobConfig(code_search_path=["."]),
    )
    client = serve.start(detached=True)

    controller = client._controller

    config = DeploymentConfig()
    config.deployment_language = JAVA
    config.is_cross_language = True

    replica_config = ReplicaConfig.create(
        "io.ray.serve.util.ExampleEchoDeployment",
        init_args=["my_prefix "],
    )

    # Deploy it
    deployment_name = "my_java"
    updating = ray.get(
        controller.deploy.remote(
            name=deployment_name,
            deployment_config_proto_bytes=config.to_proto_bytes(),
            replica_config_proto_bytes=replica_config.to_proto_bytes(),
            route_prefix=None,
            deployer_job_id=ray.get_runtime_context().job_id,
        )
    )
    assert updating
    client._wait_for_deployment_healthy(deployment_name)

    # Let's try to call it!
    all_handles = ray.get(controller._all_running_replicas.remote())
    backend_handle = all_handles["my_java"][0].actor_handle
    out = backend_handle.handleRequest.remote(
        RequestMetadata(
            request_id="id-1",
            endpoint="endpoint",
            call_method="call",
        ).SerializeToString(),
        RequestWrapper(body=msgpack_serialize("hello")).SerializeToString(),
    )
    assert ray.get(out) == "my_prefix hello"

    handle = serve.get_deployment("my_java").get_handle()
    handle_out = handle.remote("hello handle")
    assert ray.get(handle_out) == "my_prefix hello handle"

    ray.get(controller.delete_deployment.remote(deployment_name))
    client._wait_for_deployment_deleted(deployment_name)

Пример #13

0

Показать файл

def test_deploy_function_no_params_call_with_param(serve_instance, use_async):
    serve.start()

    if use_async:
        expected_output = "async!"
        deployment_cls = async_d
    else:
        expected_output = "sync!"
        deployment_cls = sync_d
    handle = serve.run(deployment_cls.bind())

    assert (requests.get(f"http://localhost:8000/{deployment_cls.name}").text
            == expected_output)
    with pytest.raises(
            TypeError,
            match=r"\(\) takes 0 positional arguments but 1 was given"):
        assert ray.get(handle.remote(1)) == expected_output

    with pytest.raises(TypeError,
                       match=r"\(\) got an unexpected keyword argument"):
        assert ray.get(handle.remote(key=1)) == expected_output

Пример #14

0

Показать файл

def test_serve_namespace(ray_start_stop):
    """
    Check that the Dashboard's Serve can interact with the Python API
    when they both start in the "serve namespace"
    """

    one = dict(
        name="one",
        num_replicas=1,
        route_prefix="/one",
        ray_actor_options={"runtime_env": {"py_modules": [test_module_uri]}},
        import_path="test_module.test.one",
    )
    put_response = requests.put(GET_OR_PUT_URL, json={"deployments": [one]}, timeout=30)
    assert put_response.status_code == 200
    ray.init(address="auto", namespace="serve")
    serve.start()
    deployments = serve.list_deployments()
    assert len(deployments) == 1
    assert "one" in deployments
    serve.shutdown()

Пример #15

0

Показать файл

def test_http_root_url(ray_shutdown):
    @serve.deployment
    def f(_):
        pass

    root_url = "https://my.domain.dev/prefix"

    port = new_port()
    os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url
    serve.start(http_options=dict(port=port))
    f.deploy()
    assert f.url == root_url + "/f"
    serve.shutdown()
    del os.environ[SERVE_ROOT_URL_ENV_KEY]

    port = new_port()
    serve.start(http_options=dict(port=port))
    f.deploy()
    assert f.url != root_url + "/f"
    assert f.url == f"http://127.0.0.1:{port}/f"
    serve.shutdown()

Пример #16

0

Показать файл

Файл: test_standalone.py Проект: parasj/ray

def test_serve_shutdown(ray_shutdown):
    ray.init(namespace="serve")
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1

Пример #17

0

Показать файл

def test_quickstart_counter(serve_with_client):
    serve.start()

    @serve.deployment
    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, *args):
            self.count += 1
            return {"count": self.count}

    # Deploy our class.
    Counter.deploy()
    print("deploy finished")

    # Query our endpoint in two different ways: from HTTP and from Python.
    assert requests.get("http://127.0.0.1:8000/Counter").json() == {"count": 1}
    print("query 1 finished")
    assert ray.get(Counter.get_handle().remote()) == {"count": 2}
    print("query 2 finished")

Пример #18

0

Показать файл

Файл: test_standalone.py Проект: marload/ray

def test_serve_controller_namespace(ray_shutdown, namespace: Optional[str],
                                    detached: bool):
    """
    Tests the serve controller is started in the current namespace if not
    anonymous or in the "serve" namespace if no namespace is specified.
    When the controller is started in the "serve" namespace, this also tests
    that we can get the serve controller from another namespace.
    """

    ray.init(namespace=namespace)
    serve.start(detached=detached)
    client = serve.api._global_client
    if namespace:
        controller_namespace = namespace
    elif detached:
        controller_namespace = "serve"
    else:
        controller_namespace = ray.get_runtime_context().namespace

    assert ray.get_actor(client._controller_name,
                         namespace=controller_namespace)

Пример #19

0

Показать файл

def test_scale_up(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=3)

    @serve.deployment("D", version="1", num_replicas=1)
    def D(*args):
        return os.getpid()

    def get_pids(expected, timeout=30):
        pids = set()
        start = time.time()
        while len(pids) < expected:
            pids.add(requests.get("http://localhost:8000/D").text)
            if time.time() - start >= timeout:
                raise TimeoutError("Timed out waiting for pids.")
        return pids

    ray.init(head_node.address)
    serve.start(detached=True)
    client = serve.connect()

    D.deploy()
    pids1 = get_pids(1)

    goal_ref = D.options(num_replicas=3).deploy(_blocking=False)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    assert get_pids(1) == pids1

    # Add a node with another CPU, another replica should get placed.
    cluster.add_node(num_cpus=1)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    pids2 = get_pids(2)
    assert pids1.issubset(pids2)

    # Add a node with another CPU, the final replica should get placed
    # and the deploy goal should be done.
    cluster.add_node(num_cpus=1)
    assert client._wait_for_goal(goal_ref)
    pids3 = get_pids(3)
    assert pids2.issubset(pids3)

Пример #20

0

Показать файл

Файл: test_standalone.py Проект: parasj/ray

def test_fixed_number_proxies(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)

    ray.init(head_node.address)
    node_ids = ray._private.state.node_ids()
    assert len(node_ids) == 3

    with pytest.raises(
        pydantic.ValidationError,
        match="you must specify the `fixed_number_replicas` parameter.",
    ):
        serve.start(
            http_options={
                "location": "FixedNumber",
            }
        )

    serve.start(
        http_options={
            "port": new_port(),
            "location": "FixedNumber",
            "fixed_number_replicas": 2,
        }
    )

    # Only the controller and two http proxy should be started.
    controller_handle = get_global_client()._controller
    node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote())
    assert len(node_to_http_actors) == 2

    proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote())
    proxy_names = ActorNameList.FromString(proxy_names_bytes)
    assert len(proxy_names.names) == 2

    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()

Пример #21

0

Показать файл

Файл: test_standalone.py Проект: parasj/ray

def test_http_root_url(ray_shutdown):
    @serve.deployment
    def f(_):
        pass

    root_url = "https://my.domain.dev/prefix"

    port = new_port()
    os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url == root_url + "/f"
    serve.shutdown()
    ray.shutdown()
    del os.environ[SERVE_ROOT_URL_ENV_KEY]

    port = new_port()
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url != root_url + "/f"
    assert f.url == f"http://127.0.0.1:{port}/f"
    serve.shutdown()
    ray.shutdown()

    ray.init(runtime_env={"env_vars": {SERVE_ROOT_URL_ENV_KEY: root_url}})
    port = new_port()
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url == root_url + "/f"
    serve.shutdown()
    ray.shutdown()

Пример #22

0

Показать файл

def test_deployment_to_schema_to_deployment():
    @serve.deployment(
        num_replicas=3,
        route_prefix="/hello",
        ray_actor_options={
            "runtime_env": {
                "working_dir": ("https://github.com/shrekris-anyscale/"
                                "test_module/archive/HEAD.zip"),
                "py_modules": [
                    ("https://github.com/shrekris-anyscale/"
                     "test_deploy_group/archive/HEAD.zip"),
                ],
            }
        },
    )
    def f():
        # The body of this function doesn't matter. It gets replaced by
        # global_f() when the import path in f._func_or_class is overwritten.
        # This function is used as a convenience to apply the @serve.deployment
        # decorator without converting global_f() into a Deployment object.
        pass

    f._func_or_class = "ray.dashboard.modules.serve.tests.test_schema.global_f"

    deployment = schema_to_deployment(deployment_to_schema(f))

    assert deployment.num_replicas == 3
    assert deployment.route_prefix == "/hello"
    assert deployment.ray_actor_options["runtime_env"]["working_dir"] == (
        "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip")
    assert deployment.ray_actor_options["runtime_env"]["py_modules"] == [
        "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip",
        "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip",
    ]

    serve.start()
    deployment.deploy()
    assert ray.get(deployment.get_handle().remote()) == "Hello world!"
    assert requests.get("http://localhost:8000/hello").text == "Hello world!"
    serve.shutdown()

Пример #23

0

Показать файл

Файл: scripts.py Проект: vishalbelsare/ray

def run(
    config_or_import_path: str,
    runtime_env: str,
    runtime_env_json: str,
    working_dir: str,
    app_dir: str,
    address: str,
    host: str,
    port: int,
    blocking: bool,
):
    sys.path.insert(0, app_dir)

    final_runtime_env = parse_runtime_env_args(
        runtime_env=runtime_env,
        runtime_env_json=runtime_env_json,
        working_dir=working_dir,
    )

    if pathlib.Path(config_or_import_path).is_file():
        config_path = config_or_import_path
        cli_logger.print(f'Deploying from config file: "{config_path}".')

        with open(config_path, "r") as config_file:
            config = ServeApplicationSchema.parse_obj(yaml.safe_load(config_file))
        is_config = True
    else:
        import_path = config_or_import_path
        cli_logger.print(f'Deploying from import path: "{import_path}".')
        node = import_attr(import_path)
        is_config = False

    # Setting the runtime_env here will set defaults for the deployments.
    ray.init(address=address, namespace=SERVE_NAMESPACE, runtime_env=final_runtime_env)
    client = serve.start(detached=True)

    try:
        if is_config:
            client.deploy_app(config)
        else:
            serve.run(node, host=host, port=port)
        cli_logger.success("Deployed successfully.")

        if blocking:
            while True:
                # Block, letting Ray print logs to the terminal.
                time.sleep(10)

    except KeyboardInterrupt:
        cli_logger.info("Got KeyboardInterrupt, shutting down...")
        serve.shutdown()
        sys.exit()

Пример #24

0

Показать файл

Файл: test_regression.py Проект: manuels/ray

def test_fastapi_serialization(shutdown_ray):
    # https://github.com/ray-project/ray/issues/15511
    app = FastAPI()

    @serve.deployment(name="custom_service")
    @serve.ingress(app)
    class CustomService:
        def deduplicate(self, data):
            data.drop_duplicates(inplace=True)
            return data

        @app.post("/deduplicate")
        def _deduplicate(self, request):
            data = request["data"]
            columns = request["columns"]
            import pandas as pd
            data = pd.DataFrame(data, columns=columns)
            data.drop_duplicates(inplace=True)
            return data.values.tolist()

    serve.start()
    CustomService.deploy()

Пример #25

0

Показать файл

Файл: test_standalone.py Проект: parasj/ray

def test_http_root_path(ray_shutdown):
    @serve.deployment
    def hello():
        return "hello"

    port = new_port()
    root_path = "/serve"
    serve.start(http_options=dict(root_path=root_path, port=port))
    hello.deploy()

    # check whether url is prefixed correctly
    assert hello.url == f"http://127.0.0.1:{port}{root_path}/hello"

    # check routing works as expected
    resp = requests.get(hello.url)
    assert resp.status_code == 200
    assert resp.text == "hello"

    # check advertized routes are prefixed correctly
    resp = requests.get(f"http://127.0.0.1:{port}{root_path}/-/routes")
    assert resp.status_code == 200
    assert resp.json() == {"/hello": "hello"}

Пример #26

0

Показать файл

Файл: noop_latency.py Проект: rlan/ray

def main(num_replicas: int, num_queries: Optional[int],
         max_concurrent_queries: Optional[int], blocking: bool):
    serve.start()

    print(f"num_replicas={num_replicas}")
    print(f"max_concurrent_queries={max_concurrent_queries}")

    @serve.deployment(num_replicas=num_replicas,
                      max_concurrent_queries=max_concurrent_queries)
    def noop(_):
        return "hello world"

    noop.deploy()

    url = "{}/noop".format(DEFAULT_HTTP_ADDRESS)

    if num_queries:
        run_http_benchmark(url, num_queries)
    if blocking:
        print("Endpoint {} is ready.".format(url))
        while True:
            time.sleep(5)

Пример #27

0

Показать файл

Файл: test_gcs_failure.py Проект: vishalbelsare/ray

def serve_ha(external_redis, monkeypatch):  # noqa: F811
    monkeypatch.setenv("RAY_SERVE_KV_TIMEOUT_S", "1")
    address_info = ray.init(
        num_cpus=36,
        namespace="default_test_namespace",
        _metrics_export_port=9999,
        _system_config={
            "metrics_report_interval_ms": 1000,
            "task_retry_delay_ms": 50
        },
    )
    yield (address_info, serve.start(detached=True))
    ray.shutdown()

Пример #28

0

Показать файл

Файл: test_api.py Проект: miqdigital/ray

def test_deploy_async_class_no_params(serve_instance):
    @serve.deployment
    class AsyncCounter:
        async def __init__(self):
            await asyncio.sleep(5)
            self.count = 0

        async def __call__(self):
            self.count += 1
            await asyncio.sleep(5)
            return {"count": self.count}

    serve.start()
    AsyncCounter.deploy()

    assert requests.get("http://127.0.0.1:8000/AsyncCounter").json() == {
        "count": 1
    }
    assert requests.get("http://127.0.0.1:8000/AsyncCounter").json() == {
        "count": 2
    }
    assert ray.get(AsyncCounter.get_handle().remote()) == {"count": 3}

Пример #29

0

Показать файл

Файл: test_cluster.py Проект: parasj/ray

def test_replica_spread(ray_cluster):
    cluster = ray_cluster

    cluster.add_node(num_cpus=2)

    # NOTE(edoakes): we need to start serve before adding the worker node to
    # guarantee that the controller is placed on the head node (we should be
    # able to tolerate being placed on workers, but there's currently a bug).
    # We should add an explicit test for that in the future when it's fixed.
    cluster.connect(namespace=SERVE_NAMESPACE)
    serve.start(detached=True)

    worker_node = cluster.add_node(num_cpus=2)

    @serve.deployment(num_replicas=2)
    def get_node_id():
        return os.getpid(), ray.get_runtime_context().node_id.hex()

    h = serve.run(get_node_id.bind())

    def get_num_nodes():
        pids = set()
        node_ids = set()
        while len(pids) < 2:
            pid, node = ray.get(h.remote())
            pids.add(pid)
            node_ids.add(node)

        return len(node_ids)

    # Check that the two replicas are spread across the two nodes.
    wait_for_condition(lambda: get_num_nodes() == 2)

    # Kill the worker node. The second replica should get rescheduled on
    # the head node.
    cluster.remove_node(worker_node)

    # Check that the replica on the dead node can be rescheduled.
    wait_for_condition(lambda: get_num_nodes() == 1)

Пример #30

0

Показать файл

def setup_anyscale_cluster():
    """Setup ray cluster at anyscale via ray.client()

    Note this is by default large scale and should be kicked off
    less frequently.
    """
    # TODO: Ray client didn't work with releaser script yet because
    # we cannot connect to anyscale cluster from its headnode
    # ray.client().env({}).connect()
    ray.init(address="auto")
    serve_client = serve.start()

    return serve_client

Python start примеры использования