Exemplo n.º 1
0
def event_loop(scope="session"):
    loop = asyncio.new_event_loop()
    loop.set_exception_handler(handle_exception)
    ucp.reset()
    yield loop
    ucp.reset()
    loop.run_until_complete(asyncio.sleep(0))
Exemplo n.º 2
0
def test_get_ucx_version():
    ucp.reset()
    version = ucp.get_ucx_version()
    assert isinstance(version, tuple)
    assert len(version) == 3
    # Check UCX isn't initialized
    assert ucp.core._ctx is None
Exemplo n.º 3
0
def test_init_options():
    ucp.reset()
    options = {"SEG_SIZE": "3M"}
    # environment specification should be ignored
    ucp.init(options)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Exemplo n.º 4
0
def test_init_options():
    ucp.reset()
    os.environ["UCX_SEG_SIZE"] = "2M"  # Should be ignored
    options = {"SEG_SIZE": "3M"}
    ucp.init(options)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Exemplo n.º 5
0
def test_init_options_and_env():
    ucp.reset()
    os.environ["UCX_SEG_SIZE"] = "4M"
    options = {"SEG_SIZE": "3M"}  # Should be ignored
    ucp.init(options, env_takes_precedence=True)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == options["SEG_SIZE"]
Exemplo n.º 6
0
def test_init_options_and_env():
    ucp.reset()
    options = {"SEG_SIZE": "3M"}  # Should be ignored
    ucp.init(options, env_takes_precedence=True)
    config = ucp.get_config()
    assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
    # Provided options dict was not modified.
    assert options == {"SEG_SIZE": "3M"}
Exemplo n.º 7
0
def test_get_config():
    with patch.dict(os.environ):
        # Unset to test default value
        if os.environ.get("UCX_TLS") is not None:
            del os.environ["UCX_TLS"]
        ucp.reset()
        config = ucp.get_config()
        assert isinstance(config, dict)
        assert config["TLS"] == "all"
Exemplo n.º 8
0
def test_check_transport(transports):
    transports_list = transports.split(",")
    inactive_transports = list(set(["posix", "tcp"]) - set(transports_list))

    ucp.reset()
    options = {"TLS": transports, "NET_DEVICES": "all"}
    ucp.init(options)

    active_transports = ucp.get_active_transports()
    for t in transports_list:
        assert any([at.startswith(t) for at in active_transports])
    for it in inactive_transports:
        assert any([not at.startswith(it) for at in active_transports])
Exemplo n.º 9
0
async def test_ep_still_in_scope_error():
    reset = ResetAfterN(2)

    def server(ep):
        ep.abort()
        reset()

    lt = ucp.create_listener(server)
    ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
    del lt
    with pytest.raises(ucp.exceptions.UCXError, match="_ucp_endpoint"):
        ucp.reset()
    ep.abort()
    ucp.reset()
Exemplo n.º 10
0
async def test_lt_still_in_scope_error():
    reset = ResetAfterN(2)

    def server(ep):
        ep.abort()
        reset()

    lt = ucp.create_listener(server)
    ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
    del ep
    with pytest.raises(ucp.exceptions.UCXError, match="ucp._libs.core._Listener"):
        ucp.reset()

    lt.close()
    ucp.reset()
Exemplo n.º 11
0
async def test_send_recv_bytes(size, blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    msg = bytearray(b"m" * size)
    msg_size = np.array([len(msg)], dtype=np.uint64)

    listener = ucp.create_listener(make_echo_server(lambda n: bytearray(n)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = bytearray(size)
    await client.recv(resp)
    assert resp == msg
Exemplo n.º 12
0
def initialize(
    create_cuda_context=True,
    enable_tcp_over_ucx=False,
    enable_infiniband=False,
    enable_nvlink=False,
    net_devices="",
):
    if create_cuda_context:
        try:
            numba.cuda.current_context()
        except Exception:
            logger.error("Unable to start CUDA Context", exc_info=True)

    if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
        try:
            import ucp
        except ImportError:
            logger.error(
                "UCX protocol requested but ucp module is not available",
                exc_info=True)
        else:
            options = {}
            if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
                tls = "tcp,sockcm,cuda_copy"
                tls_priority = "sockcm"

                if enable_infiniband:
                    tls = "rc," + tls
                if enable_nvlink:
                    tls = tls + ",cuda_ipc"

                options = {"TLS": tls, "SOCKADDR_TLS_PRIORITY": tls_priority}

                if net_devices is not None and net_devices != "":
                    options["NET_DEVICES"] = net_devices

            ucp.reset()
            ucp.init(options=options)

            ucx_env = {}
            for k, v in ucp.get_config().items():
                # Skip values that aren't actual environment variables (i.e., not strings)
                if isinstance(v, str):
                    ucx_env["UCX_" + k] = v

            # Set also UCX environment variables: required by Dask client. It may be best ti
            # to have the client asking the scheduler for the proper variables.
            os.environ.update(ucx_env)
Exemplo n.º 13
0
async def test_send_recv_numpy(size, dtype, blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    msg = np.arange(size, dtype=dtype)
    msg_size = np.array([msg.nbytes], dtype=np.uint64)

    listener = ucp.create_listener(
        make_echo_server(lambda n: np.empty(n, dtype=np.uint8)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = np.empty_like(msg)
    await client.recv(resp)
    np.testing.assert_array_equal(resp, msg)
Exemplo n.º 14
0
async def test_send_recv_numba(size, dtype, blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)
    cuda = pytest.importorskip("numba.cuda")

    ary = np.arange(size, dtype=dtype)
    msg = cuda.to_device(ary)
    msg_size = np.array([msg.nbytes], dtype=np.uint64)
    listener = ucp.create_listener(
        make_echo_server(lambda n: cuda.device_array((n, ), dtype=np.uint8)))
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)
    await client.send(msg_size)
    await client.send(msg)
    resp = cuda.device_array_like(msg)
    await client.recv(resp)
    np.testing.assert_array_equal(np.array(resp), np.array(msg))
Exemplo n.º 15
0
async def test_ep_still_in_scope_error():
    reset = ResetAfterN(2)

    def server(ep):
        ep.abort()
        reset()

    lt = ucp.create_listener(server)
    ep = await ucp.create_endpoint(ucp.get_address(), lt.port)
    del lt
    with pytest.raises(
        ucp.exceptions.UCXError,
        match="Trying to reset UCX but not all Endpoints and/or Listeners are closed()",
    ):
        ucp.reset()
    ep.abort()
    ucp.reset()
    async def run():
        address = bytearray(ucp.get_worker_address())

        if error_type == "unreachable":
            # Shutdown worker, then send its address to client process via
            # multiprocessing.Queue
            ucp.reset()
            q1.put(address)
        else:
            # Send worker address to client process via # multiprocessing.Queue,
            # wait for client to connect, then shutdown worker.
            q1.put(address)

            ep_ready = q2.get()
            assert ep_ready == "ready"

            ucp.reset()

            q1.put("disconnected")
Exemplo n.º 17
0
def test_logging():
    """
    Test default logging configuration.
    """
    import logging

    root = logging.getLogger("ucx")

    # ucp.init will only print INFO LINES
    with captured_logger(root, level=logging.INFO) as foreign_log:
        ucp.reset()
        options = {"SEG_SIZE": "3M"}
        ucp.init(options)
    assert len(foreign_log.getvalue()) > 0

    with captured_logger(root, level=logging.ERROR) as foreign_log:
        ucp.reset()
        options = {"SEG_SIZE": "3M"}
        ucp.init(options)

    assert len(foreign_log.getvalue()) == 0
Exemplo n.º 18
0
async def test_send_recv_error(blocking_progress_mode):
    asyncio.get_event_loop().set_exception_handler(handle_exception)
    ucp.reset()
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    async def say_hey_server(ep):
        await ep.send(bytearray(b"Hey"))

    listener = ucp.create_listener(say_hey_server)
    client = await ucp.create_endpoint(ucp.get_address(), listener.port)

    msg = bytearray(100)
    with pytest.raises(
            ucp.exceptions.UCXError,
            match=r"length mismatch: 3 \(got\) != 100 \(expected\)"):
        await client.recv(msg)
    await client.close()
    listener.close()
    del client
    assert listener.closed() is True
    del listener
Exemplo n.º 19
0
def test_worker_address(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    addr = ucp.get_worker_address()
    assert addr is not None
    ucp.reset()
Exemplo n.º 20
0
def test_get_config():
    ucp.reset()
    config = ucp.get_config()
    assert isinstance(config, dict)
    assert config["MEMTYPE_CACHE"] == "n"
Exemplo n.º 21
0
 def __call__(self):
     self.count += 1
     if self.count == self.n:
         ucp.reset()
Exemplo n.º 22
0
def test_init_unknown_option():
    ucp.reset()
    options = {"UNKNOWN_OPTION": "3M"}
    with pytest.raises(ucp.exceptions.UCXConfigError):
        ucp.init(options)
Exemplo n.º 23
0
def test_init_invalid_option():
    ucp.reset()
    options = {"SEG_SIZE": "invalid-size"}
    with pytest.raises(ucp.exceptions.UCXConfigError):
        ucp.init(options)
Exemplo n.º 24
0
async def test_send_recv_cudf(event_loop, g):
    # requires numba=0.45 (.nbytes)
    # or fix nbytes in distributed
    cudf = pytest.importorskip("cudf")

    class UCX:
        def __init__(self, ep):
            self.ep = ep
            loop = asyncio.get_event_loop()
            self.queue = asyncio.Queue(loop=loop)

        async def write(self, cdf):
            header, _frames = cdf.serialize()
            frames = [pickle.dumps(header)] + _frames

            # Send meta data
            await self.ep.send(np.array([len(frames)], dtype=np.uint64))
            await self.ep.send(
                np.array(
                    [hasattr(f, "__cuda_array_interface__") for f in frames],
                    dtype=np.bool,
                )
            )
            await self.ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64))
            # Send frames
            # breakpoint()
            for frame in frames:
                if nbytes(frame) > 0:
                    await self.ep.send(frame)
                else:
                    await self.ep.send(np.empty(0, dtype=np.uint8))

        async def read(self):
            try:
                # Recv meta data
                nframes = np.empty(1, dtype=np.uint64)
                await self.ep.recv(nframes)
                is_cudas = np.empty(nframes[0], dtype=np.bool)
                await self.ep.recv(is_cudas)
                sizes = np.empty(nframes[0], dtype=np.uint64)
                await self.ep.recv(sizes)
            except (ucp.exceptions.UCXCanceled, ucp.exceptions.UCXCloseError) as e:
                msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
                raise e(msg)
            else:
                # Recv frames
                frames = []
                for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()):
                    if size > 0:
                        if is_cuda:
                            frame = cuda.device_array((size,), dtype=np.uint8)
                        else:
                            frame = np.empty(size, dtype=np.uint8)
                        await self.ep.recv(frame)
                        frames.append(frame)
                    else:
                        if is_cuda:
                            frames.append(cuda.device_array((0,), dtype=np.uint8))
                        else:
                            frames.append(b"")
                return frames

    class UCXListener:
        def __init__(self):
            self.comm = None

        def start(self):
            async def serve_forever(ep):
                ucx = UCX(ep)
                self.comm = ucx

            self.ucp_server = ucp.create_listener(serve_forever, port=14339)

    uu = UCXListener()
    uu.start()
    uu.address = ucp.get_address()
    uu.client = await ucp.create_endpoint(uu.address, uu.ucp_server.port)
    ucx = UCX(uu.client)
    await asyncio.sleep(0.2)
    msg = g(cudf)
    frames, _ = await asyncio.gather(uu.comm.read(), ucx.write(msg))
    ucx_header = pickle.loads(frames[0])
    cudf_buffer = frames[1:]
    typ = type(msg)
    res = typ.deserialize(ucx_header, cudf_buffer)

    from dask.dataframe.utils import assert_eq

    assert_eq(res, msg)
    await uu.comm.ep.close()
    await uu.client.close()

    assert uu.client.closed()
    assert uu.comm.ep.closed()
    del uu.ucp_server
    ucp.reset()
Exemplo n.º 25
0
def reset():
    ucp.reset()
    yield
    ucp.reset()
Exemplo n.º 26
0
def test_set_env():
    ucp.reset()
    os.environ["UCX_SEG_SIZE"] = "2M"
    config = ucp.get_config()
    assert config["SEG_SIZE"] == os.environ["UCX_SEG_SIZE"]
Exemplo n.º 27
0
def server(env, port, func):
    # create listener receiver
    # write cudf object
    # confirm message is sent correctly
    ucp.reset()
    os.environ.update(env)
    ucp.init()

    async def f(listener_port):
        # coroutine shows up when the client asks
        # to connect
        async def write(ep):
            import cupy

            cupy.cuda.set_allocator(None)

            print("CREATING CUDA OBJECT IN SERVER...")
            cuda_obj_generator = cloudpickle.loads(func)
            cuda_obj = cuda_obj_generator()
            msg = {"data": to_serialize(cuda_obj)}
            frames = await to_frames(msg,
                                     serializers=("cuda", "dask", "pickle"))
            for i in range(ITERATIONS):
                # Send meta data
                await ep.send(np.array([len(frames)], dtype=np.uint64))
                await ep.send(
                    np.array(
                        [
                            hasattr(f, "__cuda_array_interface__")
                            for f in frames
                        ],
                        dtype=np.bool,
                    ))
                await ep.send(
                    np.array([nbytes(f) for f in frames], dtype=np.uint64))
                # Send frames
                for frame in frames:
                    if nbytes(frame) > 0:
                        await ep.send(frame)

            print("CONFIRM RECEIPT")
            close_msg = b"shutdown listener"
            msg_size = np.empty(1, dtype=np.uint64)
            await ep.recv(msg_size)

            msg = np.empty(msg_size[0], dtype=np.uint8)
            await ep.recv(msg)
            recv_msg = msg.tobytes()
            assert recv_msg == close_msg
            print("Shutting Down Server...")
            await ep.close()
            lf.close()

        lf = ucp.create_listener(write, port=listener_port)
        try:
            while not lf.closed():
                await asyncio.sleep(0.1)
        except ucp.UCXCloseError:
            pass

    loop = asyncio.get_event_loop()
    loop.run_until_complete(f(port))
Exemplo n.º 28
0
async def test_flush(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)

    await ucp.flush()
    ucp.reset()
Exemplo n.º 29
0
def test_fence(blocking_progress_mode):
    ucp.init(blocking_progress_mode=blocking_progress_mode)
    # this should always succeed
    ucp.fence()
    ucp.reset()
Exemplo n.º 30
0
def client(env, port, func):
    # wait for server to come up
    # receive cudf object
    # deserialize
    # assert deserialized msg is cdf
    # send receipt

    ucp.reset()
    os.environ.update(env)
    ucp.init()

    # must create context before importing
    # cudf/cupy/etc
    before_rx, before_tx = total_nvlink_transfer()

    async def read():
        await asyncio.sleep(1)
        ep = await get_ep("client", port)
        msg = None
        import cupy

        cupy.cuda.set_allocator(None)
        for i in range(ITERATIONS):
            # storing cu objects in msg
            # we delete to minimize GPU memory usage
            # del msg
            try:
                # Recv meta data
                nframes = np.empty(1, dtype=np.uint64)
                await ep.recv(nframes)
                is_cudas = np.empty(nframes[0], dtype=np.bool)
                await ep.recv(is_cudas)
                sizes = np.empty(nframes[0], dtype=np.uint64)
                await ep.recv(sizes)
            except (ucp.exceptions.UCXCanceled,
                    ucp.exceptions.UCXCloseError) as e:
                msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST"
                raise e(msg)
            else:
                # Recv frames
                frames = []
                for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()):
                    if size > 0:
                        if is_cuda:
                            frame = cuda_array(size)
                        else:
                            frame = np.empty(size, dtype=np.uint8)
                        await ep.recv(frame)
                        frames.append(frame)
                    else:
                        if is_cuda:
                            frames.append(cuda_array(size))
                        else:
                            frames.append(b"")

            msg = await from_frames(frames)

        close_msg = b"shutdown listener"
        close_msg_size = np.array([len(close_msg)], dtype=np.uint64)

        await ep.send(close_msg_size)
        await ep.send(close_msg)

        print("Shutting Down Client...")
        return msg["data"]

    rx_cuda_obj = asyncio.get_event_loop().run_until_complete(read())
    rx_cuda_obj + rx_cuda_obj
    num_bytes = nbytes(rx_cuda_obj)
    print(f"TOTAL DATA RECEIVED: {num_bytes}")
    # nvlink only measures in KBs
    if num_bytes > 90000:
        rx, tx = total_nvlink_transfer()
        msg = f"RX BEFORE SEND: {before_rx} -- RX AFTER SEND: {rx} \
               -- TOTAL DATA: {num_bytes}"

        print(msg)
        assert rx > before_rx

    cuda_obj_generator = cloudpickle.loads(func)
    pure_cuda_obj = cuda_obj_generator()

    if isinstance(rx_cuda_obj, cupy.ndarray):
        cupy.testing.assert_allclose(rx_cuda_obj, pure_cuda_obj)
    else:
        cudf.tests.utils.assert_eq(rx_cuda_obj, pure_cuda_obj)