def batch_matmul_cblas(cfg, x, y): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- cfg : ConfigSpace Autotvm tuning space config file x : tvm.Tensor 3-D with shape [batch, M, K] y : tvm.Tensor 3-D with shape [batch, N, K] Returns ------- output : tvm.Tensor 3-D with shape [batch, M, N] """ assert len(x.shape) == 3 and len( y.shape) == 3, "only support 3-dim batch_matmul" XB, M, XK = get_const_tuple(x.shape) YB, N, YK = get_const_tuple(y.shape) assert XB == YB, "batch dimension doesn't match" assert XK == YK, "shapes of x and y is inconsistant" cfg.add_flop(XB * M * N * XK * 2) return cblas.batch_matmul(x, y, False, True)
def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype=tvm.float32): ashape = (batch, l, n) if transa else (batch, n, l) bshape = (batch, m, l) if transb else (batch, l, m) A = tvm.placeholder(ashape, name='A', dtype=dtype) B = tvm.placeholder(bshape, name='B', dtype=dtype) C = cblas.batch_matmul(A, B, transa, transb) D = tvm.compute(C.shape, lambda k, i, j: C[k, i,j], name="D") s = tvm.create_schedule(D.op) def get_numpy(a, b, transa, transb): if transa: a = a.transpose(0, 2, 1) if not transb: b = b.transpose(0, 2, 1) return topi.testing.batch_matmul(a, b) def verify(target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.cblas.matmul", True): print("skip because extern function is not available") return ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D], target) a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), ctx) f(a, b, d) tvm.testing.assert_allclose( d.asnumpy(), get_numpy(a.asnumpy(), b.asnumpy(), transa, transb), rtol=1e-5) verify()
def verify_batch_matmul(batch, m, l, n, lib, transa=False, transb=False, iterative=False, dtype="float32"): ashape = (batch, l, n) if transa else (batch, n, l) bshape = (batch, m, l) if transb else (batch, l, m) A = te.placeholder(ashape, name="A", dtype=dtype) B = te.placeholder(bshape, name="B", dtype=dtype) C = cblas.batch_matmul(A, B, transa, transb) D = te.compute(C.shape, lambda k, i, j: C[k, i, j], name="D") s = te.create_schedule(D.op) def get_numpy(a, b, transa, transb): if transa: a = a.transpose(0, 2, 1) if not transb: b = b.transpose(0, 2, 1) return tvm.topi.testing.batch_matmul(a, b) def compile(f, name="test_batch_matmul", ext=".so"): path = name + ext f.export_library(path) mod = tvm.runtime.load_module(path) f = mod[name] return f def verify(target="llvm"): if not tvm.testing.device_enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func(lib.__name__ + ".matmul", True): print("skip because extern function is not available") return dev = tvm.cpu(0) name = "test_batch_matmul" f = tvm.build(s, [A, B, D], target, name=name) if target == "c": f = compile(f, name) a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), dev) d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), dev) f(a, b, d) tvm.testing.assert_allclose(d.asnumpy(), get_numpy(a.asnumpy(), b.asnumpy(), transa, transb), rtol=1e-5) verify("llvm") verify("c")
def batch_matmul_x86(x, y): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- x : tvm.Tensor 3-D with shape [batch, M, K] y : tvm.Tensor 3-D with shape [batch, N, K] Returns ------- output : tvm.Tensor 3-D with shape [batch, M, N] """ target = tvm.target.current_target() if "cblas" in target.libs: return cblas.batch_matmul(x, y, False, True) return batch_matmul_default(x, y)
def _declaration_batch_matmul_nopack(cfg, x, y): """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are data in batch. Parameters ---------- cfg : ConfigSpace Autotvm tuning space config file x : tvm.Tensor 3-D with shape [batch, M, K] y : tvm.Tensor 3-D with shape [batch, N, K] Returns ------- output : tvm.Tensor 3-D with shape [batch, M, N] """ target = tvm.target.current_target() if "cblas" in target.libs: return cblas.batch_matmul(x, y, False, True) assert len(x.shape) == 3 and len( y.shape) == 3, "only support 3-dim batch_matmul" XB, M, XK = get_const_tuple(x.shape) YB, N, YK = get_const_tuple(y.shape) assert XB == YB, "batch dimension doesn't match" assert XK == YK, "shapes of x and y is inconsistant" B = XB K = XK if cfg.is_fallback: _default_batch_matmul_nopack_config(cfg, M, N, K) k = tvm.reduce_axis((0, K), name='k') C = tvm.compute( (B, M, N), lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k), tag='batch_matmul') return C