Пример #1
0
async def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(send_ft)
                pending_list.append(recv_ft)
            await asyncio.wait(pending_list)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(send_ft)
                pending_list.append(recv_ft)
            await asyncio.wait(pending_list)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Пример #2
0
def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2 ** i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_msg = ucp.ucp_msg(send_buffer_region)
            recv_msg = ucp.ucp_msg(recv_buffer_region)
            recv_req = comm_ep.recv(recv_msg, msg_len)
            recv_req.result()
            send_req = comm_ep.send(send_msg, msg_len)
            send_req.result()

        send_msg = []
        recv_msg = []
        for j in range(iters):
            send_msg.append(ucp.ucp_msg(send_buffer_region))
            recv_msg.append(ucp.ucp_msg(recv_buffer_region))

        start = time.time()
        for j in range(iters):
            recv_req = comm_ep.recv(recv_msg[j], msg_len)
            recv_req.result()
            send_req = comm_ep.send(send_msg[j], msg_len)
            send_req.result()
        end = time.time()
        lat = end - start
        lat = ((lat/2) / iters)* 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Пример #3
0
async def talk_to_server(ip, port):

    global max_msg_log
    global args

    start_string = "in talk_to_server"
    if args.blind_recv:
        start_string += " + blind recv"
    if args.check_data:
        start_string += " + data validity check"
    print(start_string)

    msg_log = max_msg_log

    ep = ucp.get_endpoint(ip, port)

    send_buffer_region = ucp.buffer_region()
    send_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)

    recv_msg = None
    recv_buffer_region = None
    recv_req = None

    if not args.blind_recv:
        recv_buffer_region = ucp.buffer_region()
        recv_buffer_region.alloc_host(1 << msg_log)
        recv_msg = ucp.ucp_msg(recv_buffer_region)

    if args.check_data:
        send_msg.set_mem(1, 1 << msg_log)
        if not args.blind_recv:
            recv_msg.set_mem(1, 1 << msg_log)

    if not args.blind_recv:
        recv_req = await ep.recv(recv_msg, 1 << msg_log)
    else:
        recv_req = await ep.recv_future()

    send_req = await ep.send(send_msg, 1 << msg_log)

    if args.check_data:
        errs = 0
        errs = recv_req.check_mem(0, 1 << msg_log)
        print("num errs: " + str(errs))

    send_buffer_region.free_host()
    if not args.blind_recv:
        recv_buffer_region.free_host()

    ucp.destroy_ep(ep)
    print("done with talk_to_server")
Пример #4
0
async def talk_to_server(ip, port):

    global args
    global max_msg_log

    msg_log = max_msg_log

    start_string = "in talk_to_server"
    if args.blind_recv:
        start_string += " + blind recv"
    if args.use_fast:
        start_string += " + using fast ops"
    print(start_string)

    ep = ucp.get_endpoint(ip, port)
    print("got endpoint")

    send_buffer_region = ucp.buffer_region()
    send_buffer_region.alloc_cuda(1 << msg_log)

    recv_msg = None
    recv_buffer_region = None
    recv_req = None

    if not args.blind_recv:
        recv_buffer_region = ucp.buffer_region()
        recv_buffer_region.alloc_cuda(1 << msg_log)
        recv_msg = ucp.ucp_msg(recv_buffer_region)

    send_msg = ucp.ucp_msg(send_buffer_region)

    if not args.blind_recv:
        if args.use_fast:
            recv_req = await ep.recv_fast(recv_msg, 1 << msg_log)
        else:
            recv_req = await ep.recv(recv_msg, 1 << msg_log)
    else:
        recv_req = await ep.recv_future()

    if args.use_fast:
        send_req = await ep.send_fast(send_msg, 1 << msg_log)
    else:
        send_req = await ep.send(send_msg, 1 << msg_log)

    send_buffer_region.free_cuda()
    if not args.blind_recv:
        recv_buffer_region.free_cuda()
    ucp.destroy_ep(ep)

    print("passed talk_to_server")
Пример #5
0
async def talk_to_client(client_ep):

    global args

    msg_log = max_msg_log
    iters = max_iters

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)
    recv_msg = ucp.ucp_msg(recv_buffer_region)

    print("{}\t\t{}".format("Size (bytes)", "Latency (us)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_req = await client_ep.send_fast(send_msg, msg_len)
            recv_req = await client_ep.recv_fast(recv_msg, msg_len)

        start = time.time()
        for j in range(iters):
            send_req = await client_ep.send_fast(send_msg, msg_len)
            recv_req = await client_ep.recv_fast(recv_msg, msg_len)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000
        print("{}\t\t{}".format(msg_len, lat))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    ucp.stop_server()
Пример #6
0
def talk_to_client(client_ep):

    global args
    global cb_not_done

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}".format("Size (bytes)", "Uni-Bandwidth (GB/s)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)
        end = time.time()
        lat = end - start
        #lat = ((lat/2) / iters)* 1000000
        bw = (iters * window_size * msg_len) / lat
        bw = bw / 1e9  #GB/s
        print("{}\t\t{}".format(msg_len, bw))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    cb_not_done = False
    ucp.stop_server()
Пример #7
0
def test_cupy(dtype):
    cupy = pytest.importorskip('cupy')
    arr = cupy.ones(10, dtype)

    buffer_region = ucp.buffer_region()
    buffer_region.populate_cuda_ptr(arr)

    result = cupy.asarray(buffer_region)
    cupy.testing.assert_array_equal(result, arr)
Пример #8
0
def test_set_read():
    obj = memoryview(b'hi')
    buffer_region = ucp.buffer_region()
    buffer_region.populate_ptr(obj)
    res = memoryview(buffer_region)
    assert res == obj
    assert res.tobytes() == obj.tobytes()

    # our properties
    assert buffer_region.is_cuda == 0
    assert buffer_region.shape[0] == 2
Пример #9
0
def test_numpy(dtype, data):
    np = pytest.importorskip("numpy")
    arr = np.ones(10, dtype)

    buffer_region = ucp.buffer_region()

    if data:
        buffer_region.populate_ptr(arr.data)
    else:
        buffer_region.populate_ptr(arr.data)

    result = np.asarray(buffer_region)
    np.testing.assert_array_equal(result, arr)
Пример #10
0
async def talk_to_client(client_ep):

    global args

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}".format("Size (bytes)", "Bi-Bandwidth (GB/s)"))

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            await asyncio.wait(pending_list)

        start = time.time()
        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                send_msg = ucp.ucp_msg(send_buffer_region)
                send_ft = comm_ep.send(send_msg, msg_len)
                pending_list.append(send_ft)
            await asyncio.wait(pending_list)
        end = time.time()
        lat = end - start
        bw = (iters * window_size * msg_len) / lat
        bw = bw / 1e9  #GB/s
        print("{}\t\t{}".format(msg_len, bw))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    ucp.stop_server()
Пример #11
0
def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)
    comm_ep = server_ep

    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        recv_buffer_region.alloc_host(1 << msg_log)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            pending_list = []
            for k in range(window_size):
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(recv_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

        for j in range(iters):
            pending_list = []
            for k in range(window_size):
                recv_msg = ucp.ucp_msg(recv_buffer_region)
                recv_ft = comm_ep.recv(recv_msg, msg_len)
                pending_list.append(recv_ft)
            while len(pending_list) > 0:
                for ft in pending_list:
                    if ft.done() == True:
                        pending_list.remove(ft)

    if args.mem_type == 'cuda':
        recv_buffer_region.free_cuda()
    else:
        recv_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Пример #12
0
async def talk_to_server(ip, port):

    global args

    msg_log = max_msg_log
    iters = max_iters

    server_ep = ucp.get_endpoint(ip, port)

    send_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)

    send_msg = ucp.ucp_msg(send_buffer_region)

    for i in range(msg_log):
        msg_len = 2**i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            recv_req = server_ep.recv_future()
            await recv_req
            send_req = await server_ep.send(send_msg, msg_len)

        start = time.time()
        for j in range(iters):
            recv_req = server_ep.recv_future()
            await recv_req
            send_req = await server_ep.send(send_msg, msg_len)
        end = time.time()
        lat = end - start
        lat = ((lat / 2) / iters) * 1000000

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()

    ucp.destroy_ep(server_ep)
Пример #13
0
def test_alloc_cuda_raises():
    br = ucp.buffer_region()
    with pytest.raises(ValueError, match=msg):
        br.alloc_cuda(10)
Пример #14
0
def test_free_cuda_raises():
    br = ucp.buffer_region()
    with pytest.raises(ValueError, match=msg):
        br.free_cuda()
Пример #15
0
def talk_to_client(client_ep):

    global args
    global cb_not_done

    msg_log = max_msg_log
    iters = max_iters
    comm_ep = client_ep

    send_buffer_region = ucp.buffer_region()
    recv_buffer_region = ucp.buffer_region()

    if args.mem_type == 'cuda':
        send_buffer_region.alloc_cuda(1 << msg_log)
        recv_buffer_region.alloc_cuda(1 << msg_log)
    else:
        send_buffer_region.alloc_host(1 << msg_log)
        recv_buffer_region.alloc_host(1 << msg_log)

    print("{}\t\t{}\t\t{}\t\t{}".format("Size (bytes)", "Latency (us)",
                                        "Issue (us)", "Progress (us)"))

    for i in range(msg_log):
        msg_len = 2 ** i

        warmup_iters = int((0.1 * iters))
        for j in range(warmup_iters):
            send_msg = ucp.ucp_msg(send_buffer_region)
            recv_msg = ucp.ucp_msg(recv_buffer_region)
            send_req = comm_ep.send(send_msg, msg_len)
            recv_req = comm_ep.recv(recv_msg, msg_len)
            send_req.result()
            recv_req.result()

        send_msg = []
        recv_msg = []
        for j in range(iters):
            send_msg.append(ucp.ucp_msg(send_buffer_region))
            recv_msg.append(ucp.ucp_msg(recv_buffer_region))

        start = time.time()
        issue_lat = 0
        progress_lat = 0

        for j in range(iters):

            tmp_start = time.time()
            send_req = comm_ep.send(send_msg[j], msg_len)
            tmp_end = time.time()
            issue_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            send_req.result()
            tmp_end = time.time()
            progress_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            recv_req = comm_ep.recv(recv_msg[j], msg_len)
            tmp_end = time.time()
            issue_lat += (tmp_end - tmp_start)

            tmp_start = time.time()
            recv_req.result()
            tmp_end = time.time()
            progress_lat += (tmp_end - tmp_start)

        end = time.time()
        lat = end - start
        lat = ((lat/2) / iters)* 1000000
        issue_lat = ((issue_lat/2) / iters)* 1000000
        progress_lat = ((progress_lat/2) / iters)* 1000000
        print("{}\t\t{}\t\t{}\t\t{}".format(msg_len, lat, issue_lat,
                                            progress_lat))

    if args.mem_type == 'cuda':
        send_buffer_region.free_cuda()
        recv_buffer_region.free_cuda()
    else:
        send_buffer_region.free_host()
        recv_buffer_region.free_host()

    ucp.destroy_ep(client_ep)
    cb_not_done = False