Пример #1
0
async def test_local_cuda_cluster():
    async with LocalCUDACluster(asynchronous=True) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            assert len(cluster.workers) == utils.get_n_gpus()

            # CUDA_VISIBLE_DEVICES cycles properly
            def get_visible_devices():
                return os.environ["CUDA_VISIBLE_DEVICES"]

            result = await client.run(get_visible_devices)

            assert all(
                len(v.split(",")) == utils.get_n_gpus()
                for v in result.values())
            for i in range(utils.get_n_gpus()):
                assert {int(v.split(",")[i])
                        for v in result.values()
                        } == set(range(utils.get_n_gpus()))

            # Use full memory
            assert sum(w.memory_limit
                       for w in cluster.workers.values()) == TOTAL_MEMORY

            for w, devices in result.items():
                ident = devices[0]
                assert int(ident) == cluster.scheduler.workers[w].name

            with pytest.raises(ValueError):
                cluster.scale(1000)
Пример #2
0
async def test_local_cuda_cluster():
    async with LocalCUDACluster(scheduler_port=0,
                                asynchronous=True,
                                device_memory_limit=1) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            assert len(cluster.workers) == utils.get_n_gpus()

            # CUDA_VISIBLE_DEVICES cycles properly
            def get_visible_devices():
                return os.environ["CUDA_VISIBLE_DEVICES"]

            result = await client.run(get_visible_devices)

            assert all(
                len(v.split(",")) == utils.get_n_gpus()
                for v in result.values())
            for i in range(utils.get_n_gpus()):
                assert {int(v.split(",")[i])
                        for v in result.values()
                        } == set(range(utils.get_n_gpus()))

            # Use full memory, checked with some buffer to ignore rounding difference
            full_mem = sum(w.memory_limit for w in cluster.workers.values())
            assert full_mem >= MEMORY_LIMIT - 1024 and full_mem < MEMORY_LIMIT + 1024

            for w, devices in result.items():
                ident = devices.split(",")[0]
                assert int(ident) == cluster.scheduler.workers[w].name

            with pytest.raises(ValueError):
                cluster.scale(1000)
Пример #3
0
def test_get_n_gpus():
    assert isinstance(get_n_gpus(), int)

    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"
        assert get_n_gpus() == 3
    finally:
        del os.environ["CUDA_VISIBLE_DEVICES"]
Пример #4
0
def test_cpu_affinity_and_cuda_visible_devices():
    affinity = dict()
    for i in range(get_n_gpus()):
        # The negative here would be `device = 0` as required for CUDA runtime
        # calls.
        device = nvml_device_index(0, cuda_visible_devices(i))
        affinity[device] = get_cpu_affinity(device)

    for i in range(get_n_gpus()):
        assert get_cpu_affinity(i) == affinity[i]
Пример #5
0
def test_get_device_total_memory():
    for i in range(get_n_gpus()):
        with cuda.gpus[i]:
            assert (
                get_device_total_memory(i)
                == cuda.current_context().get_memory_info()[1]
            )
Пример #6
0
def test_get_ucx_net_devices_auto():
    pytest.importorskip("ucp")

    for idx in range(get_n_gpus()):
        # Since the actual device is system-dependent, we just check that
        # this function call doesn't fail. If any InfiniBand devices are
        # available, it will return that, otherwise an empty string.
        get_ucx_net_devices(idx, "auto")
Пример #7
0
def setup_rmm_pool(request, pytestconfig):
    if pytestconfig.getoption('--use-rmm-pool'):
        if not has_rmm():
            raise ImportError('The --use-rmm-pool option requires the RMM package')
        import rmm
        from dask_cuda.utils import get_n_gpus
        rmm.reinitialize(pool_allocator=True, initial_pool_size=1024*1024*1024,
                         devices=list(range(get_n_gpus())))
Пример #8
0
async def test_local_cuda_cluster():
    async with LocalCUDACluster(
        scheduler_port=0, asynchronous=True, diagnostics_port=None
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            assert len(cluster.workers) == utils.get_n_gpus()

            # CUDA_VISIBLE_DEVICES cycles properly
            def get_visible_devices():
                return os.environ["CUDA_VISIBLE_DEVICES"]

            result = await client.run(get_visible_devices)

            assert all(len(v.split(",")) == utils.get_n_gpus() for v in result.values())
            for i in range(utils.get_n_gpus()):
                assert {int(v.split(",")[i]) for v in result.values()} == set(
                    range(utils.get_n_gpus())
                )

            # Use full memory
            assert sum(w.memory_limit for w in cluster.workers) == TOTAL_MEMORY
Пример #9
0
def test_dashboard_address(loop):  # noqa: F811
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    with popen(["dask-scheduler", "--port", "9369", "--no-dashboard"]):
        with popen([
                "dask-cuda-worker",
                "127.0.0.1:9369",
                "--dashboard-address",
                "127.0.0.1:9370",
        ]):
            with Client("127.0.0.1:9369", loop=loop) as client:
                assert wait_workers(client, n_gpus=get_n_gpus())

                dashboard_addresses = client.run(
                    lambda dask_worker: dask_worker._dashboard_address)
                for v in dashboard_addresses.values():
                    assert v == "127.0.0.1:9370"
Пример #10
0
def test_rmm_managed(loop):  # noqa: F811
    rmm = pytest.importorskip("rmm")
    with popen(["dask-scheduler", "--port", "9369", "--no-dashboard"]):
        with popen([
                "dask-cuda-worker",
                "127.0.0.1:9369",
                "--host",
                "127.0.0.1",
                "--rmm-managed-memory",
                "--no-dashboard",
        ]):
            with Client("127.0.0.1:9369", loop=loop) as client:
                assert wait_workers(client, n_gpus=get_n_gpus())

                memory_resource_type = client.run(
                    rmm.mr.get_current_device_resource_type)
                for v in memory_resource_type.values():
                    assert v is rmm.mr.ManagedMemoryResource
Пример #11
0
def test_rmm_logging(loop):  # noqa: F811
    rmm = pytest.importorskip("rmm")
    with popen(["dask-scheduler", "--port", "9369", "--no-dashboard"]):
        with popen([
                "dask-cuda-worker",
                "127.0.0.1:9369",
                "--host",
                "127.0.0.1",
                "--rmm-pool-size",
                "2 GB",
                "--rmm-log-directory",
                ".",
                "--no-dashboard",
        ]):
            with Client("127.0.0.1:9369", loop=loop) as client:
                assert wait_workers(client, n_gpus=get_n_gpus())

                memory_resource_type = client.run(
                    rmm.mr.get_current_device_resource_type)
                for v in memory_resource_type.values():
                    assert v is rmm.mr.LoggingResourceAdaptor
Пример #12
0
def initialize_cluster(use_gpu=True, n_cpu=None, n_gpu=-1):
    enable_tcp_over_ucx = True
    enable_nvlink = True
    enable_infiniband = True

    logger.info('Starting dash cluster...')
    if use_gpu:
        initialize.initialize(create_cuda_context=True,
                              enable_tcp_over_ucx=enable_tcp_over_ucx,
                              enable_nvlink=enable_nvlink,
                              enable_infiniband=enable_infiniband)
        if n_gpu == -1:
            n_gpu = get_n_gpus()

        device_list = cuda_visible_devices(1, range(n_gpu)).split(',')
        CUDA_VISIBLE_DEVICES = []
        for device in device_list:
            try:
                CUDA_VISIBLE_DEVICES.append(int(device))
            except ValueError as vex:
                logger.warn(vex)

        logger.info('Using GPUs {} ...'.format(CUDA_VISIBLE_DEVICES))

        cluster = LocalCUDACluster(protocol="ucx",
                                   dashboard_address=':8787',
                                   CUDA_VISIBLE_DEVICES=CUDA_VISIBLE_DEVICES,
                                   enable_tcp_over_ucx=enable_tcp_over_ucx,
                                   enable_nvlink=enable_nvlink,
                                   enable_infiniband=enable_infiniband)
    else:
        logger.info('Using {} CPUs ...'.format(n_cpu))
        cluster = LocalCluster(dashboard_address=':8787',
                               n_workers=n_cpu,
                               threads_per_worker=4)

    client = Client(cluster)
    client.run(cupy.cuda.set_allocator)
    return client
Пример #13
0
def test_get_device_total_memory():
    for i in range(get_n_gpus()):
        with cuda.gpus[i]:
            total_mem = get_device_total_memory(i)
            assert type(total_mem) is int
            assert total_mem > 0
Пример #14
0
def test_cpu_affinity():
    for i in range(get_n_gpus()):
        affinity = get_cpu_affinity(i)
        os.sched_setaffinity(0, affinity)
        assert os.sched_getaffinity(0) == set(affinity)
Пример #15
0
def test_get_n_gpus():
    assert isinstance(get_n_gpus(), int)

    assert get_n_gpus() == 3