示例#1
0
 def creator(num_clusters: int) -> List[Cluster]:
     fake_clusters = []
     for n in range(num_clusters):
         fake_clusters.append(
             Cluster.parse_obj({
                 "id":
                 faker.pyint(),
                 "name":
                 faker.name(),
                 "type":
                 ClusterType.ON_PREMISE,
                 "owner":
                 faker.pyint(),
                 "endpoint":
                 faker.uri(),
                 "authentication":
                 choice([
                     NoAuthentication(),
                     SimpleAuthentication(
                         username=faker.user_name(),
                         password=faker.password(),
                     ),
                     KerberosAuthentication(),
                     JupyterHubTokenAuthentication(api_token=faker.uuid4()),
                 ]),
             }))
     return fake_clusters
示例#2
0
    async def factory() -> DaskClient:
        client = await DaskClient.create(
            app=minimal_app,
            settings=minimal_app.state.settings.DASK_SCHEDULER,
            endpoint=parse_obj_as(AnyUrl, local_dask_gateway_server.address),
            authentication=SimpleAuthentication(
                username="******", password=local_dask_gateway_server.password
            ),
        )
        assert client
        assert client.app == minimal_app
        assert client.settings == minimal_app.state.settings.DASK_SCHEDULER
        assert not client._subscribed_tasks

        assert client.dask_subsystem.client
        assert client.dask_subsystem.gateway
        assert client.dask_subsystem.gateway_cluster

        scheduler_infos = client.dask_subsystem.client.scheduler_info()  # type: ignore
        print(f"--> Connected to gateway {client.dask_subsystem.gateway=}")
        print(f"--> Cluster {client.dask_subsystem.gateway_cluster=}")
        print(f"--> Client {client=}")
        print(
            f"--> Cluster dashboard link {client.dask_subsystem.gateway_cluster.dashboard_link}"
        )
        created_clients.append(client)
        return client
示例#3
0
 def creator() -> Dict[str, Any]:
     simple_auth = {
         "type": "simple",
         "username": faker.user_name(),
         "password": faker.password(),
     }
     assert SimpleAuthentication.parse_obj(simple_auth)
     return simple_auth
示例#4
0
def cluster_create(faker: Faker) -> ClusterCreate:
    instance = ClusterCreate(
        name=faker.name(),
        endpoint=faker.uri(),
        type=random.choice(list(ClusterType)),
        authentication=SimpleAuthentication(username=faker.user_name(),
                                            password=faker.password()),
    )
    assert instance
    return instance
示例#5
0
async def test_ping_cluster(
    clusters_config: None,
    async_client: httpx.AsyncClient,
    local_dask_gateway_server: DaskGatewayServer,
):
    valid_cluster = ClusterPing(
        endpoint=parse_obj_as(AnyHttpUrl, local_dask_gateway_server.address),
        authentication=SimpleAuthentication(
            username="******",
            password=local_dask_gateway_server.password),
    )
    response = await async_client.post("/v2/clusters:ping",
                                       json=valid_cluster.dict(by_alias=True))
    response.raise_for_status()
    assert response.status_code == status.HTTP_204_NO_CONTENT
示例#6
0
async def test_get_cluster_entrypoint(
    loop: AbstractEventLoop,
    clusters_config: None,
    async_client: httpx.AsyncClient,
    local_dask_gateway_server: DaskGatewayServer,
    cluster: Callable[..., Cluster],
):
    some_cluster = cluster(
        endpoint=local_dask_gateway_server.address,
        authentication=SimpleAuthentication(
            username="******",
            password=local_dask_gateway_server.password).dict(by_alias=True),
    )
    response = await async_client.get(f"/v2/clusters/{some_cluster.id}")
    assert response.status_code == status.HTTP_200_OK
    print(f"<-- received cluster details response {response=}")
    cluster_out = ClusterOut.parse_obj(response.json())
    assert cluster_out
    print(f"<-- received cluster details {cluster_out=}")
示例#7
0
async def test_ping_cluster_with_error(
    enable_dev_features: None,
    mocked_director_v2_with_error,
    client: TestClient,
    logged_user: Dict[str, Any],
    faker: Faker,
    expected_http_error,
):
    cluster_ping = ClusterPing(
        endpoint=faker.uri(),
        authentication=SimpleAuthentication(username=faker.user_name(),
                                            password=faker.password()),
    )
    assert client.app
    url = client.app.router["ping_cluster_handler"].url_for()
    rsp = await client.post(f"{url}",
                            json=json.loads(cluster_ping.json(by_alias=True)))
    data, error = await assert_status(rsp, expected_http_error)
    assert not data
    assert error
async def test_get_cluster_details(
    clusters_config: None,
    registered_user: Callable[..., Dict[str, Any]],
    async_client: httpx.AsyncClient,
    local_dask_gateway_server: DaskGatewayServer,
    cluster: Callable[..., Cluster],
    dask_gateway_cluster: GatewayCluster,
    dask_gateway_cluster_client: DaskClient,
):
    user_1 = registered_user()
    # define the cluster in the DB
    some_cluster = cluster(
        user_1,
        endpoint=local_dask_gateway_server.address,
        authentication=SimpleAuthentication(
            username="******",
            password=local_dask_gateway_server.password).dict(by_alias=True),
    )
    # in its present state, the cluster should have no workers
    cluster_out = await _get_cluster_details(async_client, user_1["id"],
                                             some_cluster.id)
    assert not cluster_out.scheduler.workers, "the cluster should not have any worker!"

    # now let's scale the cluster
    _NUM_WORKERS = 1
    await dask_gateway_cluster.scale(_NUM_WORKERS)
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_delay(60),
                                       wait=wait_fixed(1)):
        with attempt:
            cluster_out = await _get_cluster_details(async_client,
                                                     user_1["id"],
                                                     some_cluster.id)
            assert cluster_out.scheduler.workers, "the cluster has no workers!"
            assert (
                len(cluster_out.scheduler.workers) == _NUM_WORKERS
            ), f"the cluster is missing {_NUM_WORKERS}, currently has {len(cluster_out.scheduler.workers)}"
            print(
                f"cluster now has its {_NUM_WORKERS}, after {json.dumps(attempt.retry_state.retry_object.statistics)}"
            )
    print(f"!!> cluster dashboard link: {dask_gateway_cluster.dashboard_link}")

    # let's start some computation
    _TASK_SLEEP_TIME = 5

    def do_some_work(x: int):
        import time

        time.sleep(x)
        return True

    task = dask_gateway_cluster_client.submit(do_some_work, _TASK_SLEEP_TIME)
    # wait for the computation to start, we should see this in the cluster infos
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_delay(10),
                                       wait=wait_fixed(1)):
        with attempt:
            cluster_out = await _get_cluster_details(async_client,
                                                     user_1["id"],
                                                     some_cluster.id)
            assert (next(iter(
                cluster_out.scheduler.workers.values())).metrics.executing == 1
                    ), "worker is not executing the task"
            print(
                f"!!> cluster metrics: {next(iter(cluster_out.scheduler.workers.values())).metrics=}"
            )
    # let's wait for the result
    result = task.result(timeout=_TASK_SLEEP_TIME + 5)
    assert result
    assert await result == True
    # wait for the computation to effectively stop
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_delay(60),
                                       wait=wait_fixed(1)):
        with attempt:
            cluster_out = await _get_cluster_details(async_client,
                                                     user_1["id"],
                                                     some_cluster.id)
            print(
                f"!!> cluster metrics: {next(iter(cluster_out.scheduler.workers.values())).metrics=}"
            )
            assert (next(iter(
                cluster_out.scheduler.workers.values())).metrics.executing == 0
                    ), "worker is still executing the task"
            assert (next(iter(
                cluster_out.scheduler.workers.values())).metrics.in_memory == 1
                    ), "worker did not keep the result in memory"
            assert (next(iter(
                cluster_out.scheduler.workers.values())).metrics.cpu == 0
                    ), "worker did not update the cpu metrics"

    # since the task is completed the worker should have stopped executing
    cluster_out = await _get_cluster_details(async_client, user_1["id"],
                                             some_cluster.id)
    worker_data = next(iter(cluster_out.scheduler.workers.values()))
    assert worker_data.metrics.executing == 0
    # in dask, the task remains in memory until the result is deleted
    assert worker_data.metrics.in_memory == 1