def check_cuda(dtype, m=32, n=32):
        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
            print("skip because cuda is not enabled..")
            return
        if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
            print("Skip because gpu does not have fp16 support")
            return

        a = te.placeholder((m, n), name="a", dtype=dtype)
        b = te.placeholder((m, n), name="b", dtype=dtype)
        c = a + b
        d = a * b
        e = topi.elemwise_sum([c, d])
        g = topi.sum(e)
        with tvm.target.cuda():
            sg = topi.cuda.schedule_reduce(g)
            ctx = tvm.gpu(0)
            func = tvm.build(sg, [a, b, g], 'cuda')
            a_np = np.random.uniform(size=(m, n)).astype(a.dtype)
            b_np = np.random.uniform(size=(m, n)).astype(b.dtype)
            g_np = np.sum(np.add(a_np * b_np, a_np + b_np))
            a_nd = tvm.nd.array(a_np, ctx)
            b_nd = tvm.nd.array(b_np, ctx)
            g_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), ctx)
            func(a_nd, b_nd, g_nd)
            tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-3)
示例#2
0
def verify_elemwise_sum(num_args, dtype):
    shape = (3,5,4)

    tvm_placeholders = []
    for i in range(num_args):
        tvm_placeholders.append(
            tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
    esum = topi.elemwise_sum(tvm_placeholders)
    s = tvm.create_schedule([esum.op])

    @memoize("topi.tests.test_topi_elemwise_sum")
    def get_ref_data():
        np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype)
                 for i in range(num_args)]
        return np_nd
    np_nd = get_ref_data()

    def check_device(device):
        if not tvm.runtime.enabled(device):
            print("Skip because %s is not enabled" % device)
            return

        ctx = tvm.context(device, 0)
        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
        f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
        f(*tvm_nd)
        np_out = np.sum(np.array(np_nd), axis=0)
        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)
示例#3
0
def verify_elemwise_sum(num_args, dtype):
    shape = (3,5,4)

    tvm_placeholders = []
    for i in range(num_args):
        tvm_placeholders.append(
            tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
    esum = topi.elemwise_sum(tvm_placeholders)
    s = tvm.create_schedule([esum.op])

    @memoize("topi.tests.test_topi_elemwise_sum")
    def get_ref_data():
        np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype)
                 for i in range(num_args)]
        return np_nd
    np_nd = get_ref_data()

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return

        ctx = tvm.context(device, 0)
        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
        f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
        f(*tvm_nd)
        np_out = np.sum(np.array(np_nd), axis=0)
        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)
示例#4
0
from __future__ import absolute_import, print_function
import tvm
import topi
import numpy as np

if __name__ == '__main__':
    x, y = 100, 10
    a = tvm.placeholder((x, y, y), name='a')
    b = tvm.placeholder((y, y), name='b')
    c = a + b
    d = a * b

    e = topi.elemwise_sum([c, d])
    f = e / 2.0
    g = topi.sum(f)
    with tvm.target.cuda():
        sg = topi.generic.schedule_reduce(g)
        print(tvm.lower(sg, [a, b], simple_mode=True))
示例#5
0
c = a + b  # same as topi.broadcast_add
d = a * b  # same as topi.broadcast_mul

######################################################################
# Overloaded with the same syntax, TOPI handles broadcasting a primitive (`int`, `float`) to a tensor :code:`d - 3.14`.

######################################################################
# Generic schedules and fusing operations
# ---------------------------------------
# Up to now, we have seen an example of how TOPI can save us from writing explicit computations in lower level API.
# But it doesn't stop here. Still we did the scheduling as before. TOPI also provides higher level
# scheduling recipes depending on a given context. For example, for CUDA,
# we can schedule the following series of operations ending with :code:`topi.sum` using only
# :code:`topi.generic.schedule_reduce`
#
e = topi.elemwise_sum([c, d], num_args=2)
f = e / 2.0
g = topi.sum(f)
with tvm.target.cuda():
    sg = topi.generic.schedule_reduce(g)
    print(tvm.lower(sg, [a, b], simple_mode=True))

######################################################################
# As you can see, scheduled stages of computation have been accumulated and we can examine them by
#
print(sg.stages)

######################################################################
# We can test the correctness by comparing with :code:`numpy` result as follows
#
func = tvm.build(sg, [a, b, g], 'cuda')
示例#6
0
文件: intro_topi.py 项目: bddppq/tvm
c = a + b  # same as topi.broadcast_add
d = a * b  # same as topi.broadcast_mul

######################################################################
# Overloaded with the same syntax, TOPI handles broadcasting a primitive (`int`, `float`) to a tensor :code:`d - 3.14`.

######################################################################
# Generic schedules and fusing operations
# ---------------------------------------
# Up to now, we have seen an example of how TOPI can save us from writing explicit computations in lower level API.
# But it doesn't stop here. Still we did the scheduling as before. TOPI also provides higher level
# scheduling recipes depending on a given context. For example, for CUDA,
# we can schedule the following series of operations ending with :code:`topi.sum` using only
# :code:`topi.generic.schedule_reduce`
#
e = topi.elemwise_sum([c, d])
f = e / 2.0
g = topi.sum(f)
with tvm.target.cuda():
    sg = topi.generic.schedule_reduce(g)
    print(tvm.lower(sg, [a, b], simple_mode=True))

######################################################################
# As you can see, scheduled stages of computation have been accumulated and we can examine them by
#
print(sg.stages)

######################################################################
# We can test the correctness by comparing with :code:`numpy` result as follows
#
func = tvm.build(sg, [a, b, g], 'cuda')