Пример #1
0
def test_ctx():
    def test_ctx_func(ctx):
        assert tvm.gpu(7) == ctx
        return tvm.cpu(0)
    x = test_ctx_func(tvm.gpu(7))
    assert x == tvm.cpu(0)
    x = tvm.opencl(10)
    x = tvm._api_internal._context_test(x, x.device_type, x.device_id)
    assert x == tvm.opencl(10)
Пример #2
0
def test_ctx():
    def test_ctx_func(ctx):
        assert tvm.gpu(7) == ctx
        return tvm.cpu(0)
    x = test_ctx_func(tvm.gpu(7))
    assert x == tvm.cpu(0)
    x = tvm.opencl(10)
    x = tvm.testing.context_test(x, x.device_type, x.device_id)
    assert x == tvm.opencl(10)
Пример #3
0
def test_device():
    def test_device_func(dev):
        assert tvm.gpu(7) == dev
        return tvm.cpu(0)

    x = test_device_func(tvm.gpu(7))
    assert x == tvm.cpu(0)
    x = tvm.opencl(10)
    x = tvm.testing.device_test(x, x.device_type, x.device_id)
    assert x == tvm.opencl(10)
Пример #4
0
def build_run_compare(
    tvm_mod,
    params1,
    input_shape,
    dtype="float32",
    target="llvm"):

    rpc_tracker_host = os.environ["TVM_TRACKER_HOST"]
    rpc_tracker_port = os.environ["TVM_TRACKER_PORT"]
    if rpc_tracker_host:
        run_on_host = 0
        target_host = "llvm -mtriple=arm64-linux-android"
        rpc_tracker_port = int(rpc_tracker_port)
    else:
        run_on_host = 1
        target_host="llvm"

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(
            tvm_mod, target_host=target_host, target=target, params=params1
        )
    if run_on_host:
        ctx = tvm.opencl()
        m = graph_runtime.create(graph, lib, ctx)
    else:
        from tvm import rpc
        from tvm.contrib import utils, ndk
        rpc_key = "android"
        tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port)
        remote = tracker.request(
            rpc_key, priority=0, session_timeout=600
        )
        temp = utils.tempdir()
        dso_binary = "dev_lib_cl.so"
        dso_binary_path = temp.relpath(dso_binary)
        ctx = remote.cl(0)
        lib.export_library(dso_binary_path, ndk.create_shared)
        remote.upload(dso_binary_path)
        rlib = remote.load_module(dso_binary)
        m = graph_runtime.create(graph, rlib, ctx)
    m.set_input(**params)
    inputs = []
    if isinstance(input_shape, dict):
        for key in input_shape:
            inputs.append(np.random.normal(size=input_shape[key]).astype(dtype))
            m.set_input(key, inputs[-1])
    else:
        inputs.append(np.random.normal(size=input_shape).astype(dtype))
        m.set_input("data", inputs[-1])
    m.run()

    ref_outputs = get_reference(tvm_mod, params1, input_shape, inputs)
    for i, ref_output in enumerate(ref_outputs):
        tvm_output = m.get_output(i)
        output = tvm_output.asnumpy()
        # for index, x in np.ndenumerate(ref_output):
        #     if abs(output[index] - x) > 0.01:
        #         print(index, output[index], x)

        np.testing.assert_allclose(output, ref_output, rtol=1e-2, atol=1e-2)
Пример #5
0
def enabled_ctx_list():
    ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)),
                ('cl', tvm.opencl(0)), ('metal', tvm.metal(0)),
                ('rocm', tvm.rocm(0)), ('vpi', tvm.vpi(0))]
    for k, v in ctx_list:
        assert tvm.context(k, 0) == v
    ctx_list = [x[1] for x in ctx_list if x[1].exist]
    return ctx_list
Пример #6
0
def enabled_ctx_list():
    ctx_list = [('cpu', tvm.cpu(0)),
                ('gpu', tvm.gpu(0)),
                ('cl', tvm.opencl(0)),
                ('metal', tvm.metal(0)),
                ('rocm', tvm.rocm(0)),
                ('vulkan', tvm.vulkan(0)),
                ('vpi', tvm.vpi(0))]
    for k, v  in ctx_list:
        assert tvm.context(k, 0) == v
    ctx_list = [x[1] for x in ctx_list if x[1].exist]
    return ctx_list
Пример #7
0
def requires_gpu(*args):
    """Mark a test as requiring a GPU to run.

    Tests with this mark will not be run unless a gpu is present.

    Parameters
    ----------
    f : function
        Function to mark
    """
    _requires_gpu = [
        pytest.mark.skipif(
            not tvm.cuda().exist and not tvm.rocm().exist
            and not tvm.opencl().exist and not tvm.metal().exist
            and not tvm.vulkan().exist,
            reason="No GPU present",
        ),
        *uses_gpu(),
    ]
    return _compose(args, _requires_gpu)
Пример #8
0
def build_run_compare(
    tvm_mod,
    params1,
    input_shape,
    dtype="float32",
    target="llvm",
    static_mem_scopes=[],
    gpu_preprocess=None,
    stat_file=None,
):

    if "TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ:
        rpc_tracker_host = os.environ["TVM_TRACKER_HOST"]
        rpc_tracker_port = os.environ["TVM_TRACKER_PORT"]
        run_on_host = 0
        target_host = "llvm -mtriple=arm64-linux-android"
        rpc_tracker_port = int(rpc_tracker_port)
    else:
        run_on_host = 1
        target_host = "llvm"

    if gpu_preprocess:
        tvm_mod_nchwc = gpu_preprocess(tvm_mod)
    else:
        tvm_mod_nchwc = tvm_mod

    if stat_file is not None:
        with autotvm.apply_history_best(stat_file):
            with tvm.transform.PassContext(opt_level=3):
                graph, lib, params = relay.build(
                    tvm_mod_nchwc, target_host=target_host, target=target, params=params1
                )
    else:
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build(
                tvm_mod_nchwc, target_host=target_host, target=target, params=params1
            )

    # verification that storage_scope has expected textures scopes
    graph_json = json.loads(graph)
    if "storage_scope" in graph_json["attrs"]:
        assert (
            len(static_mem_scopes) == len(graph_json["attrs"]["storage_scope"][1])
            or len(static_mem_scopes) == 0
        )
    else:
        assert len(static_mem_scopes) == 0

    for i in range(0, len(static_mem_scopes)):
        assert static_mem_scopes[i] == graph_json["attrs"]["storage_scope"][1][i]

    if run_on_host:
        ctx = tvm.opencl()
        m = graph_runtime.create(graph, lib, ctx)
    else:
        from tvm import rpc
        from tvm.contrib import utils, ndk

        rpc_key = "android"
        tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port)
        remote = tracker.request(rpc_key, priority=0, session_timeout=600)
        temp = utils.tempdir()
        dso_binary = "dev_lib_cl.so"
        dso_binary_path = temp.relpath(dso_binary)
        ctx = remote.cl(0)
        lib.export_library(dso_binary_path, ndk.create_shared)
        remote.upload(dso_binary_path)
        rlib = remote.load_module(dso_binary)
        m = graph_runtime.create(graph, rlib, ctx)
    m.set_input(**params)
    inputs = []
    if isinstance(input_shape, dict):
        for key in input_shape:
            inputs.append(np.random.normal(size=input_shape[key]).astype(dtype))
            m.set_input(key, inputs[-1])
    else:
        inputs.append(np.random.normal(size=input_shape).astype(dtype))
        m.set_input("data", inputs[-1])
    m.run()

    ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs)
    for i, ref_output in enumerate(ref_outputs):
        tvm_output = m.get_output(i)
        output = tvm_output.asnumpy()
        # for index, x in np.ndenumerate(ref_output):
        #     if abs(output[index] - x) > 0.01:
        #         print(index, output[index], x)

        np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1)
    return graph
Пример #9
0
best_config = dispatch_context.query(task.target, task.workload)
print("\nBest config:")
print(best_config)

# apply history best from log file
with autotvm.apply_history_best("conv2d-lenet-1.log"):
    with tvm.target.Target("opencl"):
        s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides,
                                         padding)
        func = tvm.build(s, arg_bufs)

# check correctness
a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)
w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)
c_np = conv2d_nchw_python(a_np, w_np, strides, padding)

ctx = tvm.opencl()
a_tvm = tvm.nd.array(a_np, ctx=ctx)
w_tvm = tvm.nd.array(w_np, ctx=ctx)
c_tvm = tvm.nd.empty(c_np.shape, ctx=ctx)
func(a_tvm, w_tvm, c_tvm)

tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)

# Evaluate running time. Here we choose a large repeat number (400) to reduce the noise
# and the overhead of kernel launch. You can also use nvprof to validate the result.
evaluator = func.time_evaluator(func.entry_name, ctx, number=400)
print("Time cost of this operator: %f" % evaluator(a_tvm, w_tvm, c_tvm).mean)
with open("conv2d-lenet-1.cl", "w") as fout:
    print(func.imported_modules[0].get_source(), file=fout)