示例#1
0
def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5):
    j = 16
    n = 1024
    l = 128
    m = 236
    A = tvm.placeholder((j, n, l), name='A', dtype=in_dtype)
    B = tvm.placeholder((j, l, m), name='B', dtype=in_dtype)
    C = cublas.batch_matmul(A, B, dtype=out_dtype)
    s = tvm.create_schedule(C.op)

    def verify(target="cuda"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.gpu(0)
        f = tvm.build(s, [A, B, C], target)
        a = tvm.nd.array(np.random.uniform(size=(j, n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(j, l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((j, n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(
            c.asnumpy(), np.matmul(a.asnumpy().astype(C.dtype),
                                   b.asnumpy().astype(C.dtype)).astype(C.dtype), rtol=rtol)
    verify()
示例#2
0
def batch_matmul_cublas(cfg, x, y, out_shape=None):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    x : tvm.te.Tensor
        3-D with shape [batch, M, K]

    y : tvm.te.Tensor
        3-D with shape [batch, N, K]

    out_shape : None
        The output shape

    Returns
    -------
    output : tvm.te.Tensor
        3-D with shape [batch, M, N]
    """
    b, m, k = get_const_tuple(x.shape)
    b, n, k = get_const_tuple(y.shape)
    if all([isinstance(s, int) for s in [b, m, n, k]]):
        cfg.add_flop(b * m * k * n * 2)
    return cublas.batch_matmul(x, y, False, True)
示例#3
0
def verify_batch_matmul(Ashape,
                        Bshape,
                        Cshape,
                        in_dtype,
                        out_dtype,
                        rtol=1e-5):
    A = te.placeholder(Ashape, name="A", dtype=in_dtype)
    B = te.placeholder(Bshape, name="B", dtype=in_dtype)
    C = cublas.batch_matmul(A, B, dtype=out_dtype)
    s = te.create_schedule(C.op)

    dev = tvm.cuda(0)
    f = tvm.build(s, [A, B, C], "cuda")

    if "int" in in_dtype:
        a = tvm.nd.array(
            np.random.uniform(1, 10, size=Ashape).astype(in_dtype), dev)
        b = tvm.nd.array(
            np.random.uniform(1, 10, size=Bshape).astype(in_dtype), dev)
    else:
        a = tvm.nd.array(np.random.uniform(size=Ashape).astype(A.dtype), dev)
        b = tvm.nd.array(np.random.uniform(size=Bshape).astype(B.dtype), dev)

    c = tvm.nd.array(np.zeros(Cshape, dtype=C.dtype), dev)
    f(a, b, c)
    tvm.testing.assert_allclose(
        c.numpy(),
        np.matmul(a.numpy().astype(C.dtype),
                  b.numpy().astype(C.dtype)).astype(C.dtype),
        rtol=rtol,
    )
示例#4
0
def _lower_batch_matmul(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor:
    """Lower a batch_matmul using cuBLAS."""
    return cublas.batch_matmul(
        inputs[0],
        inputs[1],
        transa=op.attrs["transpose_a"],
        transb=op.attrs["transpose_b"],
        dtype=op.checked_type.dtype,
    )
示例#5
0
def batch_matmul_cublas(cfg,
                        x,
                        y,
                        out_shape=None,
                        out_dtype=None,
                        transpose_a=False,
                        transpose_b=True):
    """Compute batch matrix multiplication of `x` and `y`.

    Both `x` and `y` can be transposed. For legacy reason, we use NT format
    (transpose_a=False, transpose_b=True) by default.

    Parameters
    ----------
    cfg : ConfigSpace
        Autotvm tuning space config file.

    x : tvm.te.Tensor
        3-D with shape [batch, M, K] or [batch, K, M].

    y : tvm.te.Tensor
        3-D with shape [batch, K, N] or [batch, N, K].

    out_shape : List[Optional]
        Explicit intended output shape of the computation. Can be useful in cases
        with dynamic input shapes.

    out_dtype : Optional[str]
        Specifies the output data type for mixed precision batch matmul.

    transpose_a : Optional[bool] = False
        Whether the first tensor is in transposed format.

    transpose_b : Optional[bool] = True
        Whether the second tensor is in transposed format.

    Returns
    -------
    output : tvm.te.Tensor
        3-D with shape [batch, M, N]
    """
    if transpose_a:
        b, k, m = get_const_tuple(x.shape)
    else:
        b, m, k = get_const_tuple(x.shape)
    if transpose_b:
        b, n, k = get_const_tuple(y.shape)
    else:
        b, k, n = get_const_tuple(y.shape)
    if all([isinstance(s, int) for s in [b, m, n, k]]):
        cfg.add_flop(b * m * k * n * 2)
    return cublas.batch_matmul(x,
                               y,
                               transa=transpose_a,
                               transb=transpose_b,
                               dtype=out_dtype)
示例#6
0
def batch_matmul_cublas(x, y):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    x : tvm.te.Tensor
        3-D with shape [batch, M, K]

    y : tvm.te.Tensor
        3-D with shape [batch, N, K]

    Returns
    -------
    output : tvm.te.Tensor
        3-D with shape [batch, M, N]
    """
    return cublas.batch_matmul(x, y, False, True)
示例#7
0
def batch_matmul_cuda(x, y):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    x : tvm.Tensor
        3-D with shape [batch, M, K]

    y : tvm.Tensor
        3-D with shape [batch, N, K]

    Returns
    -------
    output : tvm.Tensor
        3-D with shape [batch, M, N]
    """
    target = tvm.target.current_target()
    if target.target_name == "cuda" and "cublas" in target.libs:
        return cublas.batch_matmul(x, y, False, True)
    return batch_matmul_default(x, y)
示例#8
0
def batch_matmul_cublas(cfg, x, y, out_shape=None):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    x : tvm.te.Tensor
        3-D with shape [batch, M, K]

    y : tvm.te.Tensor
        3-D with shape [batch, N, K]

    out_shape : None
        The output shape

    Returns
    -------
    output : tvm.te.Tensor
        3-D with shape [batch, M, N]
    """
    b, m, k = x.shape
    b, n, k = y.shape
    cfg.add_flop(b * m * k * n * 2)
    return cublas.batch_matmul(x, y, False, True)