示例#1
0
def test_matmul_add():
    n = 1024
    l = 128
    m = 235
    bias = tvm.var('bias', dtype=tvm.float32)
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C1 = mps.matmul(A, B)
    C2 = mps.matmul(B, A, True, True)
    D1 = tvm.compute(C1.shape, lambda i, j: C1[i, j] + bias, name="D1")
    D2 = tvm.compute(C2.shape, lambda i, j: C2[i, j] + bias, name="D2")
    s1 = tvm.create_schedule(D1.op)
    s2 = tvm.create_schedule(D2.op)

    def verify(A, B, D, s, bias, target="llvm"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.mps.matmul", True):
            print("skip because extern function is not avalable")
            return
        ctx = tvm.cpu(0)
        f = tvm.build(s, [A, B, D, bias], target)
        a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
        bb = 10.0
        f(a, b, d, bb)
        np.testing.assert_allclose(d.asnumpy(),
                                   np.dot(a.asnumpy(), b.asnumpy()) + bb,
                                   rtol=1e-5)

    verify(A, B, D1, s1, bias)
    verify(A, B, D2, s2, bias)
示例#2
0
def test_matmul():
    n = 1024
    l = 128
    m = 256
    A = te.placeholder((n, l), name="A")
    B = te.placeholder((l, m), name="B")
    C = mps.matmul(A, B)
    D = te.compute(C.shape, lambda *i: C(*i) + 1.0)
    s = te.create_schedule(D.op)
    yo, xo = D.op.axis
    block_y = te.thread_axis("blockIdx.y")
    block_x = te.thread_axis("blockIdx.x")
    thread_y = te.thread_axis("threadIdx.y")
    thread_x = te.thread_axis("threadIdx.x")
    by, ty = s[D].split(yo, factor=16)
    bx, tx = s[D].split(xo, factor=16)
    s[D].bind(by, block_y)
    s[D].bind(bx, block_x)
    s[D].bind(ty, thread_y)
    s[D].bind(tx, thread_x)

    def verify(A, B, D, s, target="metal"):
        if not tvm.get_global_func("tvm.contrib.mps.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.metal(0)
        f = tvm.build(s, [A, B, D], "metal")
        a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)

    verify(A, B, D, s)
示例#3
0
def test_matmul():
    if not tvm.module.enabled("metal"):
        print("skip because %s is not enabled..." % "metal")
        return
    n = 1024
    l = 128
    m = 256
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C = mps.matmul(A, B)
    D = tvm.compute(
        C.shape,
        lambda *i: C(*i) + 1.
    )
    s = tvm.create_schedule(D.op)
    yo, xo = D.op.axis
    block_y = tvm.thread_axis("blockIdx.y")
    block_x = tvm.thread_axis("blockIdx.x")
    thread_y = tvm.thread_axis("threadIdx.y")
    thread_x = tvm.thread_axis("threadIdx.x")
    by, ty = s[D].split(yo, factor=16)
    bx, tx = s[D].split(xo, factor=16)
    s[D].bind(by, block_y)
    s[D].bind(bx, block_x)
    s[D].bind(ty, thread_y)
    s[D].bind(tx, thread_x)



    def verify(A, B, D, s, target="metal"):
        if not tvm.get_global_func("tvm.contrib.mps.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.metal(0)
        f = tvm.build(s, [A, B, D], "metal")
        a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
    verify(A, B, D, s)