示例#1
0
def test_move():
    """No float number operation in simple move. So the estimator should raise an error"""
    N = 1024

    A = te.placeholder((N,))
    C = te.compute((N,), lambda i: A[i])
    s = te.create_schedule([C.op])

    try:
        compute_flop(s)
        assert False
    except RuntimeError:
        pass
def test_move():
    """No float number operation in simple move. So the estimator should raise an error """
    N = 1024

    A = tvm.placeholder((N,))
    C = tvm.compute((N,), lambda i: A[i])
    s = tvm.create_schedule([C.op])

    try:
        compute_flop(s)
        assert False
    except RuntimeError:
        pass
示例#3
0
def test_pack_gemm():
    for i in range(5):
        N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)]
        (input_dtype, acc_dtype) = random_dtypes()
        A = te.placeholder((N, L), dtype=input_dtype)
        B = te.placeholder((M, L), dtype=input_dtype)
        k = te.reduce_axis((0, L))

        bn = 4
        idxd = tvm.tir.indexdiv
        idxm = tvm.tir.indexmod

        A_pack = te.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j])
        B_pack = te.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j])
        C_pack = te.compute(
            (N // bn, M // bn, bn, bn),
            lambda i, j, ii, jj: te.sum(
                A_pack[i, k, ii].astype(acc_dtype) * B_pack[j, k, jj].astype(acc_dtype), axis=[k]
            ),
        )
        C = te.compute(
            (N, M), lambda i, j: C_pack[idxd(i, bn)][idxd(j, bn)][idxm(i, bn)][idxm(j, bn)]
        )

        s = te.create_schedule([C.op])
        assert compute_flop(s) == 2 * N * L * M
示例#4
0
def test_conv():
    for i in range(5):
        N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
        (input_dtype, acc_dtype) = random_dtypes()
        D = te.placeholder((N, CI, H, W), dtype=input_dtype)
        K = te.placeholder((CO, CI, KH, KW), dtype=input_dtype)

        KH = min(H, KH)
        KW = min(W, KW)

        ci = te.reduce_axis((0, CI))
        kh = te.reduce_axis((0, KH))
        kw = te.reduce_axis((0, KW))

        OH = (H - KH) + 1
        OW = (W - KW) + 1

        C = te.compute(
            (N, CO, OH, OW),
            lambda n, co, h, w: te.sum(
                D[n][ci][h][w].astype(acc_dtype) * K[co][ci][h][w].astype(acc_dtype),
                axis=[ci, kh, kw],
            ),
        )

        s = te.create_schedule([C.op])

        assert compute_flop(s) == 2 * N * CO * OH * OW * CI * KH * KW
示例#5
0
def test_outer_dot():
    for i in range(5):
        N, M = [np.random.randint(10, 128) * 4 for _ in range(2)]
        A = tvm.placeholder((N, ))
        B = tvm.placeholder((M, ))

        C = tvm.compute((N, M), lambda i, j: A[i] * B[j])

        s = tvm.create_schedule([C.op])
        assert compute_flop(s) == N * M
def test_outer_dot():
    for i in range(5):
        N, M = [np.random.randint(10, 128) * 4 for _ in range(2)]
        A = tvm.placeholder((N,))
        B = tvm.placeholder((M,))

        C = tvm.compute((N, M), lambda i, j: A[i] * B[j])

        s = tvm.create_schedule([C.op])
        assert compute_flop(s) == N * M
示例#7
0
def test_outer_dot():
    for i in range(5):
        N, M = [np.random.randint(10, 128) * 4 for _ in range(2)]
        (input_dtype, acc_dtype) = random_dtypes()
        A = te.placeholder((N,), dtype=input_dtype)
        B = te.placeholder((M,), dtype=input_dtype)

        C = te.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype))

        s = te.create_schedule([C.op])
        assert compute_flop(s) == N * M
def test_outer_dot():
    for i in range(5):
        N, M = [np.random.randint(10, 128) * 4 for _ in range(2)]
        (input_dtype, acc_dtype) = random_dtypes()
        A = tvm.placeholder((N,), dtype=input_dtype)
        B = tvm.placeholder((M,), dtype=input_dtype)

        C = tvm.compute((N, M), lambda i, j: A[i].astype(acc_dtype) * B[j].astype(acc_dtype))

        s = tvm.create_schedule([C.op])
        assert compute_flop(s) == N * M
def test_pack_gemm():
    for i in range(5):
        N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)]
        A = tvm.placeholder((N, L))
        B = tvm.placeholder((M, L))
        k = tvm.reduce_axis((0, L))

        bn = 4
        A_pack = tvm.compute((N // bn, L, bn), lambda i, j, k: A[i * bn + k][j])
        B_pack = tvm.compute((M // bn, L, bn), lambda i, j, k: B[i * bn + k][j])
        C_pack = tvm.compute((N // bn, M // bn, bn, bn), lambda i, j, ii, jj:
        tvm.sum(A_pack[i, k, ii] * B_pack[j, k, jj], axis=[k]))
        C = tvm.compute((N, M), lambda i, j: C_pack[i // bn][j // bn][i % bn][j % bn])

        s = tvm.create_schedule([C.op])
        assert compute_flop(s) == 2 * N * L * M
示例#10
0
def test_pack_gemm():
    for i in range(5):
        N, L, M = [np.random.randint(10, 128) * 4 for _ in range(3)]
        A = tvm.placeholder((N, L))
        B = tvm.placeholder((M, L))
        k = tvm.reduce_axis((0, L))

        bn = 4
        A_pack = tvm.compute((N // bn, L, bn),
                             lambda i, j, k: A[i * bn + k][j])
        B_pack = tvm.compute((M // bn, L, bn),
                             lambda i, j, k: B[i * bn + k][j])
        C_pack = tvm.compute(
            (N // bn, M // bn, bn, bn), lambda i, j, ii, jj: tvm.sum(
                A_pack[i, k, ii] * B_pack[j, k, jj], axis=[k]))
        C = tvm.compute((N, M),
                        lambda i, j: C_pack[i // bn][j // bn][i % bn][j % bn])

        s = tvm.create_schedule([C.op])
        assert compute_flop(s) == 2 * N * L * M
示例#11
0
def test_max_pool():
    for i in range(5):
        N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
        (input_dtype, _) = random_dtypes()
        D = te.placeholder((N, CI, H, W), dtype=input_dtype)

        KH = min(H, KH)
        KW = min(W, KW)

        kh = te.reduce_axis((0, KH))
        kw = te.reduce_axis((0, KW))

        OH = (H - KH) + 1
        OW = (W - KW) + 1

        C = te.compute(
            (N, CO, OH, OW), lambda n, co, h, w: tvm.te.max(D[n][co][h + kh][w + kw], axis=[kh, kw])
        )

        s = te.create_schedule([C.op])

        assert compute_flop(s) == N * CO * OH * OW * KH * KW
def test_conv():
    for i in range(5):
        N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
        D = tvm.placeholder((N, CI, H, W))
        K = tvm.placeholder((CO, CI, KH, KW))

        KH = min(H, KH)
        KW = min(W, KW)

        ci = tvm.reduce_axis((0, CI))
        kh = tvm.reduce_axis((0, KH))
        kw = tvm.reduce_axis((0, KW))

        OH = (H - KH) + 1
        OW = (W - KW) + 1

        C = tvm.compute((N, CO, OH, OW), lambda n, co, h, w:
        tvm.sum(D[n][ci][h][w] * K[co][ci][h][w], axis=[ci, kh, kw]))

        s = tvm.create_schedule([C.op])

        assert compute_flop(s) == 2 * N * CO * OH * OW * CI * KH * KW
示例#13
0
def test_max_pool():
    for i in range(5):
        N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
        (input_dtype, _) = random_dtypes()
        D = tvm.placeholder((N, CI, H, W), dtype=input_dtype)

        KH = min(H, KH)
        KW = min(W, KW)

        kh = tvm.reduce_axis((0, KH))
        kw = tvm.reduce_axis((0, KW))

        OH = (H - KH) + 1
        OW = (W - KW) + 1

        C = tvm.compute(
            (N, CO, OH, OW),
            lambda n, co, h, w: tvm.max(D[n][co][h + kh][w + kw], axis=[kh, kw]))

        s = tvm.create_schedule([C.op])

        assert compute_flop(s) == N * CO * OH * OW * KH * KW