示例#1
0
def dense_int8(cfg, data, weight, bias=None, out_dtype=None):
    """Dense operator for int8 on CUDA"""
    if out_dtype is None:
        out_dtype = data.dtype

    batch, in_dim = get_const_tuple(data.shape)
    out_dim, _ = get_const_tuple(weight.shape)

    target = tvm.target.current_target()
    if "cublas" in target.libs:
        matmul = cublas.matmul(data, weight, False, True, out_dtype)
        if bias is not None:
            matmul = tvm.compute((batch, out_dim), \
                                 lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \
                                 tag=tag.BROADCAST)
        return matmul

    k = tvm.reduce_axis((0, in_dim), name='k')

    matmul = tvm.compute((batch, out_dim),
                         lambda i, j: tvm.sum(data[i, k].astype(out_dtype) *
                                              weight[j, k].astype(out_dtype),
                                              axis=[k]),
                         tag="dense_int8")

    cfg.add_flop(batch * in_dim * out_dim * 2)

    if bias is not None:
        matmul = tvm.compute(
            (batch, out_dim),
            lambda i, j: matmul[i, j] + bias[j].astype(out_dtype),
            tag=tag.BROADCAST)
        cfg.add_flop(batch * out_dim)

    return matmul
示例#2
0
def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5):
    n = 1024
    l = 128
    m = 236
    A = tvm.placeholder((n, l), name='A', dtype=in_dtype)
    B = tvm.placeholder((l, m), name='B', dtype=in_dtype)
    C = cublas.matmul(A, B, dtype=out_dtype)
    s = tvm.create_schedule(C.op)

    def verify(target="cuda"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.gpu(0)
        f = tvm.build(s, [A, B, C], target)
        a = tvm.nd.array(np.random.uniform(0, 128, size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(0, 128, size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy().astype(C.dtype), b.asnumpy().astype(C.dtype)), rtol=rtol)
    verify()
示例#3
0
文件: dense.py 项目: gwli/tvm
def dense_cuda(data, weight, bias=None):
    """Dense operator for cuda backend.

    Parameters
    ----------
    data : tvm.Tensor
        2-D with shape [batch, in_dim]

    weight : tvm.Tensor
        2-D with shape [out_dim, in_dim]

    bias : tvm.Tensor, optional
        1-D with shape [out_dim]

    Returns
    -------
    output : tvm.Tensor
        2-D with shape [batch, out_dim]
    """
    assert len(data.shape) == 2 and len(weight.shape) == 2, \
        "only support 2-dim dense"
    if bias is not None:
        assert len(bias.shape) == 1
    batch, in_dim = data.shape
    out_dim, _ = weight.shape
    target = tvm.target.current_target()
    if "cublas" in target.libs:
        matmul = cublas.matmul(data, weight, False, True)
        if bias is not None:
            matmul = tvm.compute((batch, out_dim), \
                                 lambda i, j: matmul[i, j] + bias[j], \
                                 tag=tag.BROADCAST)
        return matmul
    return dense_default(data, weight, bias)
示例#4
0
def _matmul_cublas_common(
    cfg,
    tensor_a,
    tensor_b,
    bias=None,
    out_dtype=None,
    transpose_a=False,
    transpose_b=False,
):
    assert len(tensor_a.shape) == 2 and len(
        tensor_b.shape) == 2, "only support 2-dim matmul"
    if bias is not None:
        assert len(bias.shape) == 1
    if out_dtype is None:
        out_dtype = tensor_a.dtype
    assert out_dtype == tensor_a.dtype, "Mixed precision not supported."
    batch, in_dim = get_const_tuple(tensor_a.shape)
    out_dim, _ = get_const_tuple(tensor_b.shape)
    matmul = cublas.matmul(tensor_a, tensor_b, transpose_a, transpose_b)
    if all(isinstance(d, int) for d in [batch, in_dim, out_dim]):
        cfg.add_flop(batch * in_dim * out_dim * 2)
    if bias is not None:
        matmul = te.compute((batch, out_dim),
                            lambda i, j: matmul[i, j] + bias[j],
                            tag=tag.BROADCAST)
    return matmul
示例#5
0
def dense_cuda(data, weight, bias=None):
    """Dense operator for cuda backend.

    Parameters
    ----------
    data : tvm.Tensor
        2-D with shape [batch, in_dim]

    weight : tvm.Tensor
        2-D with shape [out_dim, in_dim]

    bias : tvm.Tensor, optional
        1-D with shape [out_dim]

    Returns
    -------
    output : tvm.Tensor
        2-D with shape [batch, out_dim]
    """
    assert len(data.shape) == 2 and len(weight.shape) == 2, \
        "only support 2-dim dense"
    if bias is not None:
        assert len(bias.shape) == 1
    batch, in_dim = data.shape
    out_dim, _ = weight.shape
    target = tvm.target.current_target()
    if "cublas" in target.libs:
        matmul = cublas.matmul(data, weight, False, True)
        if bias is not None:
            matmul = tvm.compute((batch, out_dim), \
                                 lambda i, j: matmul[i, j] + bias[j], \
                                 tag=tag.BROADCAST)
        return matmul
    return dense_default(data, weight, bias)
示例#6
0
def test_matmul_add():
    n = 1024
    l = 128
    m = 235
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C = cublas.matmul(A, B)
    s = tvm.create_schedule(C.op)

    def verify(target="cuda"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.gpu(0)
        f = tvm.build(s, [A, B, C], target)
        a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5)
    verify()
示例#7
0
def _lower_matmul(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor:
    """Lower a matmul using cuBLAS."""
    return cublas.matmul(
        inputs[0],
        inputs[1],
        transa=op.attrs["transpose_a"],
        transb=op.attrs["transpose_b"],
        dtype=op.checked_type.dtype,
    )
示例#8
0
def dense_cublas(cfg, data, weight, bias=None, out_dtype=None):
    """Dense operator on CUDA with CUBLAS"""
    assert len(data.shape) == 2 and len(weight.shape) == 2, "only support 2-dim dense"
    if bias is not None:
        assert len(bias.shape) == 1
    if out_dtype is None:
        out_dtype = data.dtype
    assert out_dtype == data.dtype, "Mixed precision not supported."
    batch, in_dim = data.shape
    out_dim, _ = weight.shape
    matmul = cublas.matmul(data, weight, False, True)
    cfg.add_flop(batch * in_dim * out_dim * 2)
    if bias is not None:
        matmul = te.compute(
            (batch, out_dim), lambda i, j: matmul[i, j] + bias[j], tag=tag.BROADCAST
        )
    return matmul
示例#9
0
def dense_cuda(cfg, data, weight, bias=None, out_dtype=None):
    """Dense operator for cuda backend.

    Parameters
    ----------
    data : tvm.Tensor
        2-D with shape [batch, in_dim]

    weight : tvm.Tensor
        2-D with shape [out_dim, in_dim]

    bias : tvm.Tensor, optional
        1-D with shape [out_dim]

    Returns
    -------
    output : tvm.Tensor
        2-D with shape [batch, out_dim]
    """
    # pylint: disable=unused-argument
    assert len(data.shape) == 2 and len(weight.shape) == 2, \
        "only support 2-dim dense"
    if bias is not None:
        assert len(bias.shape) == 1
    if out_dtype is None:
        out_dtype = data.dtype
    batch, in_dim = data.shape
    out_dim, _ = weight.shape
    target = tvm.target.current_target()
    if "cublas" in target.libs:
        assert out_dtype == data.dtype, "Mixed precision not supported."
        matmul = cublas.matmul(data, weight, False, True)
        if bias is not None:
            matmul = tvm.compute((batch, out_dim), \
                                 lambda i, j: matmul[i, j] + bias[j], \
                                 tag=tag.BROADCAST)
        return matmul
    return dense_default(data, weight, bias, out_dtype)
示例#10
0
文件: dense.py 项目: bddppq/tvm
def dense_cuda(cfg, data, weight, bias=None, out_dtype=None):
    """Dense operator for cuda backend.

    Parameters
    ----------
    data : tvm.Tensor
        2-D with shape [batch, in_dim]

    weight : tvm.Tensor
        2-D with shape [out_dim, in_dim]

    bias : tvm.Tensor, optional
        1-D with shape [out_dim]

    Returns
    -------
    output : tvm.Tensor
        2-D with shape [batch, out_dim]
    """
    # pylint: disable=unused-argument
    assert len(data.shape) == 2 and len(weight.shape) == 2, \
        "only support 2-dim dense"
    if bias is not None:
        assert len(bias.shape) == 1
    if out_dtype is None:
        out_dtype = data.dtype
    batch, in_dim = data.shape
    out_dim, _ = weight.shape
    target = tvm.target.current_target()
    if "cublas" in target.libs:
        assert out_dtype == data.dtype, "Mixed precision not supported."
        matmul = cublas.matmul(data, weight, False, True)
        if bias is not None:
            matmul = tvm.compute((batch, out_dim), \
                                 lambda i, j: matmul[i, j] + bias[j], \
                                 tag=tag.BROADCAST)
        return matmul
    return dense_default(data, weight, bias, out_dtype)
示例#11
0
def _lower_dense(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor:
    """Lower a dense using cuBLAS."""
    return cublas.matmul(
        inputs[0], inputs[1], transa=False, transb=True, dtype=op.checked_type.dtype
    )