def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5): j = 16 n = 1024 l = 128 m = 236 A = tvm.placeholder((j, n, l), name='A', dtype=in_dtype) B = tvm.placeholder((j, l, m), name='B', dtype=in_dtype) C = cublas.batch_matmul(A, B, dtype=out_dtype) s = tvm.create_schedule(C.op) def verify(target="cuda"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.cublas.matmul", True): print("skip because extern function is not available") return ctx = tvm.gpu(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array(np.random.uniform(size=(j, n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(j, l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((j, n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.matmul(a.asnumpy().astype(C.dtype), b.asnumpy().astype(C.dtype)).astype(C.dtype), rtol=rtol) verify()
def batch_matmul_cublas(cfg, x, y, out_shape=None): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- x : tvm.te.Tensor 3-D with shape [batch, M, K] y : tvm.te.Tensor 3-D with shape [batch, N, K] out_shape : None The output shape Returns ------- output : tvm.te.Tensor 3-D with shape [batch, M, N] """ b, m, k = get_const_tuple(x.shape) b, n, k = get_const_tuple(y.shape) if all([isinstance(s, int) for s in [b, m, n, k]]): cfg.add_flop(b * m * k * n * 2) return cublas.batch_matmul(x, y, False, True)
def verify_batch_matmul(Ashape, Bshape, Cshape, in_dtype, out_dtype, rtol=1e-5): A = te.placeholder(Ashape, name="A", dtype=in_dtype) B = te.placeholder(Bshape, name="B", dtype=in_dtype) C = cublas.batch_matmul(A, B, dtype=out_dtype) s = te.create_schedule(C.op) dev = tvm.cuda(0) f = tvm.build(s, [A, B, C], "cuda") if "int" in in_dtype: a = tvm.nd.array( np.random.uniform(1, 10, size=Ashape).astype(in_dtype), dev) b = tvm.nd.array( np.random.uniform(1, 10, size=Bshape).astype(in_dtype), dev) else: a = tvm.nd.array(np.random.uniform(size=Ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.uniform(size=Bshape).astype(B.dtype), dev) c = tvm.nd.array(np.zeros(Cshape, dtype=C.dtype), dev) f(a, b, c) tvm.testing.assert_allclose( c.numpy(), np.matmul(a.numpy().astype(C.dtype), b.numpy().astype(C.dtype)).astype(C.dtype), rtol=rtol, )
def _lower_batch_matmul(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor: """Lower a batch_matmul using cuBLAS.""" return cublas.batch_matmul( inputs[0], inputs[1], transa=op.attrs["transpose_a"], transb=op.attrs["transpose_b"], dtype=op.checked_type.dtype, )
def batch_matmul_cublas(cfg, x, y, out_shape=None, out_dtype=None, transpose_a=False, transpose_b=True): """Compute batch matrix multiplication of `x` and `y`. Both `x` and `y` can be transposed. For legacy reason, we use NT format (transpose_a=False, transpose_b=True) by default. Parameters ---------- cfg : ConfigSpace Autotvm tuning space config file. x : tvm.te.Tensor 3-D with shape [batch, M, K] or [batch, K, M]. y : tvm.te.Tensor 3-D with shape [batch, K, N] or [batch, N, K]. out_shape : List[Optional] Explicit intended output shape of the computation. Can be useful in cases with dynamic input shapes. out_dtype : Optional[str] Specifies the output data type for mixed precision batch matmul. transpose_a : Optional[bool] = False Whether the first tensor is in transposed format. transpose_b : Optional[bool] = True Whether the second tensor is in transposed format. Returns ------- output : tvm.te.Tensor 3-D with shape [batch, M, N] """ if transpose_a: b, k, m = get_const_tuple(x.shape) else: b, m, k = get_const_tuple(x.shape) if transpose_b: b, n, k = get_const_tuple(y.shape) else: b, k, n = get_const_tuple(y.shape) if all([isinstance(s, int) for s in [b, m, n, k]]): cfg.add_flop(b * m * k * n * 2) return cublas.batch_matmul(x, y, transa=transpose_a, transb=transpose_b, dtype=out_dtype)
def batch_matmul_cublas(x, y): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- x : tvm.te.Tensor 3-D with shape [batch, M, K] y : tvm.te.Tensor 3-D with shape [batch, N, K] Returns ------- output : tvm.te.Tensor 3-D with shape [batch, M, N] """ return cublas.batch_matmul(x, y, False, True)
def batch_matmul_cuda(x, y): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- x : tvm.Tensor 3-D with shape [batch, M, K] y : tvm.Tensor 3-D with shape [batch, N, K] Returns ------- output : tvm.Tensor 3-D with shape [batch, M, N] """ target = tvm.target.current_target() if target.target_name == "cuda" and "cublas" in target.libs: return cublas.batch_matmul(x, y, False, True) return batch_matmul_default(x, y)
def batch_matmul_cublas(cfg, x, y, out_shape=None): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- x : tvm.te.Tensor 3-D with shape [batch, M, K] y : tvm.te.Tensor 3-D with shape [batch, N, K] out_shape : None The output shape Returns ------- output : tvm.te.Tensor 3-D with shape [batch, M, N] """ b, m, k = x.shape b, n, k = y.shape cfg.add_flop(b * m * k * n * 2) return cublas.batch_matmul(x, y, False, True)