def dense_int8(cfg, data, weight, bias=None, out_dtype=None): """Dense operator for int8 on CUDA""" if out_dtype is None: out_dtype = data.dtype batch, in_dim = get_const_tuple(data.shape) out_dim, _ = get_const_tuple(weight.shape) target = tvm.target.current_target() if "cublas" in target.libs: matmul = cublas.matmul(data, weight, False, True, out_dtype) if bias is not None: matmul = tvm.compute((batch, out_dim), \ lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), \ tag=tag.BROADCAST) return matmul k = tvm.reduce_axis((0, in_dim), name='k') matmul = tvm.compute((batch, out_dim), lambda i, j: tvm.sum(data[i, k].astype(out_dtype) * weight[j, k].astype(out_dtype), axis=[k]), tag="dense_int8") cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: matmul = tvm.compute( (batch, out_dim), lambda i, j: matmul[i, j] + bias[j].astype(out_dtype), tag=tag.BROADCAST) cfg.add_flop(batch * out_dim) return matmul
def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5): n = 1024 l = 128 m = 236 A = tvm.placeholder((n, l), name='A', dtype=in_dtype) B = tvm.placeholder((l, m), name='B', dtype=in_dtype) C = cublas.matmul(A, B, dtype=out_dtype) s = tvm.create_schedule(C.op) def verify(target="cuda"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.cublas.matmul", True): print("skip because extern function is not available") return ctx = tvm.gpu(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array(np.random.uniform(0, 128, size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(0, 128, size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.dot(a.asnumpy().astype(C.dtype), b.asnumpy().astype(C.dtype)), rtol=rtol) verify()
def dense_cuda(data, weight, bias=None): """Dense operator for cuda backend. Parameters ---------- data : tvm.Tensor 2-D with shape [batch, in_dim] weight : tvm.Tensor 2-D with shape [out_dim, in_dim] bias : tvm.Tensor, optional 1-D with shape [out_dim] Returns ------- output : tvm.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ "only support 2-dim dense" if bias is not None: assert len(bias.shape) == 1 batch, in_dim = data.shape out_dim, _ = weight.shape target = tvm.target.current_target() if "cublas" in target.libs: matmul = cublas.matmul(data, weight, False, True) if bias is not None: matmul = tvm.compute((batch, out_dim), \ lambda i, j: matmul[i, j] + bias[j], \ tag=tag.BROADCAST) return matmul return dense_default(data, weight, bias)
def _matmul_cublas_common( cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False, ): assert len(tensor_a.shape) == 2 and len( tensor_b.shape) == 2, "only support 2-dim matmul" if bias is not None: assert len(bias.shape) == 1 if out_dtype is None: out_dtype = tensor_a.dtype assert out_dtype == tensor_a.dtype, "Mixed precision not supported." batch, in_dim = get_const_tuple(tensor_a.shape) out_dim, _ = get_const_tuple(tensor_b.shape) matmul = cublas.matmul(tensor_a, tensor_b, transpose_a, transpose_b) if all(isinstance(d, int) for d in [batch, in_dim, out_dim]): cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: matmul = te.compute((batch, out_dim), lambda i, j: matmul[i, j] + bias[j], tag=tag.BROADCAST) return matmul
def dense_cuda(data, weight, bias=None): """Dense operator for cuda backend. Parameters ---------- data : tvm.Tensor 2-D with shape [batch, in_dim] weight : tvm.Tensor 2-D with shape [out_dim, in_dim] bias : tvm.Tensor, optional 1-D with shape [out_dim] Returns ------- output : tvm.Tensor 2-D with shape [batch, out_dim] """ assert len(data.shape) == 2 and len(weight.shape) == 2, \ "only support 2-dim dense" if bias is not None: assert len(bias.shape) == 1 batch, in_dim = data.shape out_dim, _ = weight.shape target = tvm.target.current_target() if "cublas" in target.libs: matmul = cublas.matmul(data, weight, False, True) if bias is not None: matmul = tvm.compute((batch, out_dim), \ lambda i, j: matmul[i, j] + bias[j], \ tag=tag.BROADCAST) return matmul return dense_default(data, weight, bias)
def test_matmul_add(): n = 1024 l = 128 m = 235 A = tvm.placeholder((n, l), name='A') B = tvm.placeholder((l, m), name='B') C = cublas.matmul(A, B) s = tvm.create_schedule(C.op) def verify(target="cuda"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.cublas.matmul", True): print("skip because extern function is not available") return ctx = tvm.gpu(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5) verify()
def _lower_matmul(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor: """Lower a matmul using cuBLAS.""" return cublas.matmul( inputs[0], inputs[1], transa=op.attrs["transpose_a"], transb=op.attrs["transpose_b"], dtype=op.checked_type.dtype, )
def dense_cublas(cfg, data, weight, bias=None, out_dtype=None): """Dense operator on CUDA with CUBLAS""" assert len(data.shape) == 2 and len(weight.shape) == 2, "only support 2-dim dense" if bias is not None: assert len(bias.shape) == 1 if out_dtype is None: out_dtype = data.dtype assert out_dtype == data.dtype, "Mixed precision not supported." batch, in_dim = data.shape out_dim, _ = weight.shape matmul = cublas.matmul(data, weight, False, True) cfg.add_flop(batch * in_dim * out_dim * 2) if bias is not None: matmul = te.compute( (batch, out_dim), lambda i, j: matmul[i, j] + bias[j], tag=tag.BROADCAST ) return matmul
def dense_cuda(cfg, data, weight, bias=None, out_dtype=None): """Dense operator for cuda backend. Parameters ---------- data : tvm.Tensor 2-D with shape [batch, in_dim] weight : tvm.Tensor 2-D with shape [out_dim, in_dim] bias : tvm.Tensor, optional 1-D with shape [out_dim] Returns ------- output : tvm.Tensor 2-D with shape [batch, out_dim] """ # pylint: disable=unused-argument assert len(data.shape) == 2 and len(weight.shape) == 2, \ "only support 2-dim dense" if bias is not None: assert len(bias.shape) == 1 if out_dtype is None: out_dtype = data.dtype batch, in_dim = data.shape out_dim, _ = weight.shape target = tvm.target.current_target() if "cublas" in target.libs: assert out_dtype == data.dtype, "Mixed precision not supported." matmul = cublas.matmul(data, weight, False, True) if bias is not None: matmul = tvm.compute((batch, out_dim), \ lambda i, j: matmul[i, j] + bias[j], \ tag=tag.BROADCAST) return matmul return dense_default(data, weight, bias, out_dtype)
def dense_cuda(cfg, data, weight, bias=None, out_dtype=None): """Dense operator for cuda backend. Parameters ---------- data : tvm.Tensor 2-D with shape [batch, in_dim] weight : tvm.Tensor 2-D with shape [out_dim, in_dim] bias : tvm.Tensor, optional 1-D with shape [out_dim] Returns ------- output : tvm.Tensor 2-D with shape [batch, out_dim] """ # pylint: disable=unused-argument assert len(data.shape) == 2 and len(weight.shape) == 2, \ "only support 2-dim dense" if bias is not None: assert len(bias.shape) == 1 if out_dtype is None: out_dtype = data.dtype batch, in_dim = data.shape out_dim, _ = weight.shape target = tvm.target.current_target() if "cublas" in target.libs: assert out_dtype == data.dtype, "Mixed precision not supported." matmul = cublas.matmul(data, weight, False, True) if bias is not None: matmul = tvm.compute((batch, out_dim), \ lambda i, j: matmul[i, j] + bias[j], \ tag=tag.BROADCAST) return matmul return dense_default(data, weight, bias, out_dtype)
def _lower_dense(op: relay.Call, inputs: List[te.Tensor]) -> te.Tensor: """Lower a dense using cuBLAS.""" return cublas.matmul( inputs[0], inputs[1], transa=False, transb=True, dtype=op.checked_type.dtype )