示例#1
0
    def cb():
        nonlocal last_time
        nonlocal nvlink_state
        now = time.time()
        src_dict = {"time": [now * 1000]}

        nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
        nvlink_state["rx-ref"] = nvlink_state["rx"].copy()
        nvlink_state["tx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        nvlink_state["rx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        tx_diff = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"])
        ]

        rx_diff = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"])
        ]

        for i in range(ngpus):
            src_dict["nvlink-tx-" + str(i)] = [tx_diff[i]]
            src_dict["nvlink-rx-" + str(i)] = [rx_diff[i]]
        source.stream(src_dict, 1000)
        last_time = now
示例#2
0
    def cb():
        nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
        nvlink_state["rx-ref"] = nvlink_state["rx"].copy()
        src_dict = {}
        nvlink_state["tx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        nvlink_state["rx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        src_dict["count-tx"] = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"])
        ]
        src_dict["count-rx"] = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"])
        ]

        source.data.update(src_dict)
示例#3
0
def test_nvml_nvlink_counters(ngpus, handles, counter, control, driver):

    if driver > 450.0:
        pytest.xfail(XFAIL_LEGACY_NVLINK_MSG)

    reset = 0
    for i in range(ngpus):
        for j in range(pynvml.NVML_NVLINK_MAX_LINKS):
            assert (
                pynvml.nvmlDeviceResetNvLinkUtilizationCounter(handles[i], j, counter)
                == pynvml.NVML_SUCCESS
            )
            pynvml.nvmlDeviceSetNvLinkUtilizationControl(
                handles[i], j, counter, control, reset
            )
            countdict = pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                handles[i], j, counter
            )
            ctl = pynvml.nvmlDeviceGetNvLinkUtilizationControl(handles[i], j, counter)
            assert countdict["rx"] >= 0
            assert countdict["tx"] >= 0
            assert ctl == control
            assert (
                pynvml.nvmlDeviceFreezeNvLinkUtilizationCounter(
                    handles[i], j, counter, 1
                )
                == pynvml.NVML_SUCCESS
            )
            assert (
                pynvml.nvmlDeviceFreezeNvLinkUtilizationCounter(
                    handles[i], j, counter, 0
                )
                == pynvml.NVML_SUCCESS
            )
示例#4
0
def test_nvml_nvlink_counters(ngpus, handles, counter, control):
    reset = 0
    for i in range(ngpus):
        for j in range(pynvml.NVML_NVLINK_MAX_LINKS):
            assert pynvml.nvmlDeviceResetNvLinkUtilizationCounter(
                handles[i], j, counter) == pynvml.NVML_SUCCESS
            pynvml.nvmlDeviceSetNvLinkUtilizationControl(
                handles[i], j, counter, control, reset)
            countdict = pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                handles[i], j, counter)
            ctl = pynvml.nvmlDeviceGetNvLinkUtilizationControl(
                handles[i], j, counter)
            assert countdict['rx'] >= 0
            assert countdict['tx'] >= 0
            assert ctl == control
            assert pynvml.nvmlDeviceFreezeNvLinkUtilizationCounter(
                handles[i], j, counter, 1) == pynvml.NVML_SUCCESS
            assert pynvml.nvmlDeviceFreezeNvLinkUtilizationCounter(
                handles[i], j, counter, 0) == pynvml.NVML_SUCCESS
示例#5
0
def total_nvlink_transfer():
    import pynvml

    pynvml.nvmlShutdown()

    pynvml.nvmlInit()

    try:
        cuda_dev_id = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0])
    except Exception as e:
        print(e)
        cuda_dev_id = 0
    nlinks = pynvml.NVML_NVLINK_MAX_LINKS
    handle = pynvml.nvmlDeviceGetHandleByIndex(cuda_dev_id)
    rx = 0
    tx = 0
    for i in range(nlinks):
        transfer = pynvml.nvmlDeviceGetNvLinkUtilizationCounter(handle, i, 0)
        rx += transfer["rx"]
        tx += transfer["tx"]
    return rx, tx
示例#6
0
def nvlink_timeline(doc):

    # X Range
    x_range = DataRange1d(follow="end", follow_interval=20000, range_padding=0)
    tools = "reset,xpan,xwheel_zoom"

    item_dict = {"time": []}
    for i in range(ngpus):
        item_dict["nvlink-tx-" + str(i)] = []
        item_dict["nvlink-rx-" + str(i)] = []

    source = ColumnDataSource(item_dict)

    def _get_color(ind):
        color_list = [
            "blue",
            "red",
            "green",
            "black",
            "brown",
            "cyan",
            "orange",
            "pink",
            "purple",
            "gold",
        ]
        return color_list[ind % len(color_list)]

    tx_fig = figure(
        title="TX NVLink (per Device) [B/s]",
        sizing_mode="stretch_both",
        x_axis_type="datetime",
        x_range=x_range,
        tools=tools,
    )
    rx_fig = figure(
        title="RX NVLink (per Device) [B/s]",
        sizing_mode="stretch_both",
        x_axis_type="datetime",
        x_range=x_range,
        tools=tools,
    )
    for i in range(ngpus):
        tx_fig.line(source=source,
                    x="time",
                    y="nvlink-tx-" + str(i),
                    color=_get_color(i))
        rx_fig.line(source=source,
                    x="time",
                    y="nvlink-rx-" + str(i),
                    color=_get_color(i))
    tx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b")
    rx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b")

    doc.title = "NVLink Throughput Timeline"
    doc.add_root(column(tx_fig, rx_fig, sizing_mode="stretch_both"))

    counter = 1
    nlinks = pynvml.NVML_NVLINK_MAX_LINKS
    nvlink_state = {}
    nvlink_state["tx"] = [
        sum([
            pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
        ]) for i in range(ngpus)
    ]
    nvlink_state["rx"] = [
        sum([
            pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
        ]) for i in range(ngpus)
    ]
    nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
    nvlink_state["rx-ref"] = nvlink_state["rx"].copy()

    last_time = time.time()

    def cb():
        nonlocal last_time
        nonlocal nvlink_state
        now = time.time()
        src_dict = {"time": [now * 1000]}

        nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
        nvlink_state["rx-ref"] = nvlink_state["rx"].copy()
        nvlink_state["tx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        nvlink_state["rx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        tx_diff = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"])
        ]

        rx_diff = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"])
        ]

        for i in range(ngpus):
            src_dict["nvlink-tx-" + str(i)] = [tx_diff[i]]
            src_dict["nvlink-rx-" + str(i)] = [rx_diff[i]]
        source.stream(src_dict, 1000)
        last_time = now

    doc.add_periodic_callback(cb, 200)
示例#7
0
def nvlink(doc):

    import subprocess as sp

    # Use device-0/link-0 to get "upper bound"
    counter = 1
    nlinks = pynvml.NVML_NVLINK_MAX_LINKS
    nvlink_ver = pynvml.nvmlDeviceGetNvLinkVersion(gpu_handles[0], 0)
    nvlink_link_bw = {
        # Keys = NVLink Version, Values = Max Link BW (per direction)
        # [Note: Using specs at https://en.wikichip.org/wiki/nvidia/nvlink]
        1: 20.0 * GB,  # GB/s
        2: 25.0 * GB,  # GB/s
    }
    # Max NVLink Throughput = BW-per-link * nlinks
    max_bw = nlinks * nvlink_link_bw.get(nvlink_ver, 25.0 * GB)

    # nvmlDeviceSetNvLinkUtilizationControl seems limited, using smi:
    sp.call([
        "nvidia-smi",
        "nvlink",
        "--setcontrol",
        str(counter) + "bz",  # Get output in bytes
    ])

    tx_fig = figure(title="TX NVLink [B/s]",
                    sizing_mode="stretch_both",
                    y_range=[0, max_bw])
    tx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b")
    nvlink_state = {}
    nvlink_state["tx"] = [
        sum([
            pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
        ]) for i in range(ngpus)
    ]
    nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
    left = list(range(ngpus))
    right = [l + 0.8 for l in left]
    source = ColumnDataSource({
        "left": left,
        "right": right,
        "count-tx": [0.0 for i in range(ngpus)],
        "count-rx": [0.0 for i in range(ngpus)],
    })
    mapper = LinearColorMapper(palette=all_palettes["RdYlBu"][4],
                               low=0,
                               high=max_bw)

    tx_fig.quad(
        source=source,
        left="left",
        right="right",
        bottom=0,
        top="count-tx",
        color={
            "field": "count-tx",
            "transform": mapper
        },
    )
    tx_fig.toolbar_location = None

    rx_fig = figure(title="RX NVLink [B/s]",
                    sizing_mode="stretch_both",
                    y_range=[0, max_bw])
    rx_fig.yaxis.formatter = NumeralTickFormatter(format="0.0 b")
    nvlink_state["rx"] = [
        sum([
            pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
        ]) for i in range(ngpus)
    ]
    nvlink_state["rx-ref"] = nvlink_state["rx"].copy()

    rx_fig.quad(
        source=source,
        left="left",
        right="right",
        bottom=0,
        top="count-rx",
        color={
            "field": "count-rx",
            "transform": mapper
        },
    )
    rx_fig.toolbar_location = None

    doc.title = "NVLink Utilization Counters"
    doc.add_root(column(tx_fig, rx_fig, sizing_mode="stretch_both"))

    def cb():
        nvlink_state["tx-ref"] = nvlink_state["tx"].copy()
        nvlink_state["rx-ref"] = nvlink_state["rx"].copy()
        src_dict = {}
        nvlink_state["tx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["tx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        nvlink_state["rx"] = [
            sum([
                pynvml.nvmlDeviceGetNvLinkUtilizationCounter(
                    gpu_handles[i], j, counter)["rx"] for j in range(nlinks)
            ]) for i in range(ngpus)
        ]
        src_dict["count-tx"] = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["tx"], nvlink_state["tx-ref"])
        ]
        src_dict["count-rx"] = [
            max(a - b, 0.0) * 5.0
            for (a, b) in zip(nvlink_state["rx"], nvlink_state["rx-ref"])
        ]

        source.data.update(src_dict)

    doc.add_periodic_callback(cb, 200)