def test_log_pow_llvm(): # graph n = te.size_var("n") A = te.placeholder((n, ), name="A") B = te.compute(A.shape, lambda *i: te.power(te.log(A(*i)), 2.0), name="B") s = te.create_schedule(B.op) # create iter var and assign them tags. bx, tx = s[B].split(B.op.axis[0], factor=32) # one line to build the function. if not tvm.testing.device_enabled("llvm"): return flog = tvm.build(s, [A, B], "llvm", name="mylog") ctx = tvm.cpu(0) # launch the kernel. n = 1028 a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx) repeat = 10 ftimer = flog.time_evaluator(flog.entry_name, ctx, number=1, repeat=repeat) res = ftimer(a, b) assert len(res.results) == repeat tvm.testing.assert_allclose(b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)
def log_softmax(x, axis=-1): """Perform log softmax activation on the data Parameters ---------- data : tvm.te.Tensor 2-D input data Returns ------- output : tvm.te.Tensor 2-D output with same shape """ assert len(x.shape) == 2, "only support 2-dim log softmax" # pylint: disable=R1714 assert axis == -1 or axis == len( x.shape) - 1, "only support last axis log softmax" m, n = x.shape k = te.reduce_axis((0, n), name="k") max_elem = te.compute((m, ), lambda i: tvm.te.max(x[i, k], axis=k)) k = te.reduce_axis((0, n), name="k") expsum = te.compute( (m, ), lambda i: te.sum(te.exp(x[i, k] - max_elem[i]), axis=k)) return te.compute(x.shape, lambda i, j: x[i, j] - max_elem[i] - te.log(expsum[i]))
def test_log_pow_llvm(): """Test log pow using llvm to lower.""" # graph size_var_n = te.size_var("n") placeholder_a = te.placeholder((size_var_n, ), name="A") result_b = te.compute(placeholder_a.shape, lambda *i: te.power(te.log(placeholder_a(*i)), 2.0), name="B") schedule = te.create_schedule(result_b.op) # create iter var and assign them tags. schedule[result_b].split(result_b.op.axis[0], factor=32) # one line to build the function. if not tvm.testing.device_enabled("llvm"): return flog = tvm.build(schedule, [placeholder_a, result_b], "llvm", name="mylog") dev = tvm.cpu(0) # launch the kernel. size_var_n = 1028 buff_a = tvm.nd.array( np.random.uniform(size=size_var_n).astype(placeholder_a.dtype), dev) buff_b = tvm.nd.array(np.zeros(size_var_n, dtype=result_b.dtype), dev) repeat = 10 ftimer = flog.time_evaluator(flog.entry_name, dev, number=1, repeat=repeat) res = ftimer(buff_a, buff_b) assert len(res.results) == repeat tvm.testing.assert_allclose(buff_b.numpy(), np.power(np.log(buff_a.numpy()), 2.0), rtol=1e-5)
def log(x): """Take logarithm of input x. Parameters ---------- x : tvm.te.Tensor Input argument. Returns ------- y : tvm.te.Tensor The result. """ return te.compute(x.shape, lambda *i: te.log(x(*i)))
def _normalize(max_elem, expsum, *indices): non_reduce_indices = get_non_reduce_indices(indices) return x[indices] - max_elem[non_reduce_indices] - te.log( expsum[non_reduce_indices])
def test_basic_operation(): np.random.seed(0) shape = (10, 10) x = te.var("x", dtype='float32') k = te.reduce_axis((0, 10), name="k") l = te.reduce_axis((0, 10), name="l") A0 = te.placeholder(shape, name='A0') A1 = te.placeholder(shape, name='A1') zeros = np.zeros(shape) B = te.compute(shape, lambda i, j: A0[i, j], name='B') check_grad(B, [A0]) B = te.compute(shape, lambda i, j: A0[i, j] + A1[i, j], name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: A0[i, j] + A0[j, i], name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.floor(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.ceil(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.trunc(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.round(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: A0[i, j] + te.exp(A0[j, i]), name='B') check_grad(B, A0) B = te.compute( shape, lambda i, j: te.log(0.1 + te.abs(A0[i, j] + te.exp(A0[j, i]))), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sigmoid(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.tanh(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sqrt(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0, data_range=(0.1, 10)) B = te.compute(shape, lambda i, j: te.power(te.abs(A0[i, j]), A0[j, i]), name='B') check_grad(B, A0, data_range=(-4, 4)) B = te.compute(shape, lambda i, j: A0[i, j] * A0[j, i], name='B') check_grad(B, A0) B = te.compute((10, ), lambda i: te.sum(A0[i, k] * A0[k, i], axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sum(A0[i, k] * A0[k, i] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.max(A0[i, k] * A0[k, j] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: A0[i, j] * (A1[j, i] + A0[j, i]), name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: te.sum( A0[k, k] - A0[te.min(j + k, 9), j] * A0[i, k], axis=k), name='B') check_grad(B, A0) def fcombine(x, y): return x * y def fidentity(t0): return tvm.tir.const(1, t0) prod = te.comm_reducer(fcombine, fidentity, name='prod') B = te.compute((10, 10), lambda i, j: prod(A0[i, k] + A0[k, i], axis=k), name='B') check_grad(B, A0) X = te.placeholder((10, ), name='X') A = te.compute((10, ), lambda i: X[i] + X[9 - i]) B = te.compute((10, ), lambda i: X[i] * X[9 - i]) Y = topi.tensordot(A, B, 1) check_grad(Y, X)
def make_matrix_softmax_cross_entropy(shape, tgt, tgt_host, func_name, dtype="float32"): """TODO: Your code here""" """Hint: output shape should be (1,)""" A_=te.placeholder(shape,dtype=dtype,name="A_") A=te.placeholder(shape,dtype=dtype,name="A") #desined by myself k = te.reduce_axis((0, A.shape[1]), name="k") A_max = te.compute((A.shape[0],), lambda i: te.max(A[i, k], axis=k)) A_ex = te.compute(shape, lambda i, j: te.exp(A[i, j] - A_max[i])) k1 = te.reduce_axis((0, A.shape[1]), name="k1") A_ex_sum = te.compute((A.shape[0],), lambda i: te.sum(A_ex[i, k1], axis=k1)) A_logsoftmax = te.compute(shape, lambda i, j: te.log(A_ex[i, j] / A_ex_sum[i])) k2=te.reduce_axis((0,shape[1]),name="k2") A_logsoftmax_sum=te.compute((shape[0],0),lambda i:te.sum(A_logsoftmax[i,k2]*A_[i,k2],axis=k2)) k3=te.reduce_axis((0,shape[0]),name="k3") B=te.compute((1,),lambda i: te.sum(-A_logsoftmax_sum[k3],axis = k3)) B1=te.compute((1,), lambda i: B[i] / shape[0]) s=te.create_schedule(B1.op) if tgt=="cuda": #I'dont know why it can't work? s = te.create_schedule(B1.op) num_thread = 64 block_x = te.thread_axis("blockIdx.x") thread_x = te.thread_axis((0, num_thread), "threadIdx.x") s[A_ex].bind(A_ex.op.axis[0], block_x) s[A_max].bind(A_max.op.axis[0], block_x) k_ex_sum = A_ex_sum.op.reduce_axis[0] ko, ki = s[A_ex_sum].split(k_ex_sum, factor=num_thread) EF = s.rfactor(A_ex_sum, ki) s[A_ex_sum].bind(s[A_ex_sum].op.axis[0], block_x) s[A_ex_sum].bind(s[A_ex_sum].op.reduce_axis[0], thread_x) s[EF].compute_at(s[A_ex_sum], s[A_ex_sum].op.reduce_axis[0]) s[A_ex_sum].set_store_predicate(thread_x.var.equal(0)) tx, xi = s[A_logsoftmax].split(A_logsoftmax.op.axis[1], nparts=num_thread) s[A_logsoftmax].bind(A_logsoftmax.op.axis[0], block_x) s[A_logsoftmax].bind(tx, thread_x) k_logsoftmax_sum = A_logsoftmax_sum.op.reduce_axis[0] klso, klsi = s[A_logsoftmax_sum].split(k_logsoftmax_sum, factor=num_thread) lsEF = s.rfactor(A_logsoftmax_sum, klsi) s[A_logsoftmax_sum].bind(s[A_logsoftmax_sum].op.axis[0], block_x) s[A_logsoftmax_sum].bind(s[A_logsoftmax_sum].op.reduce_axis[0], thread_x) s[lsEF].compute_at(s[A_logsoftmax_sum], s[A_logsoftmax_sum].op.reduce_axis[0]) s[A_logsoftmax_sum].set_store_predicate(thread_x.var.equal(0)) k_B=B.op.reduce_axis[0] kbo,kbi=s[B].split(k_B,factor=num_thread) bEF=s.rfactor(B,kbi) s[B].bind(s[B].op.reduce_axis[0],thread_x) s[bEF].compute_at(s[B],s[B].op.reduce_axis[0]) s[B].set_store_predicate(block_x.var.equal(0)) s[B1].set_store_predicate(block_x.var.equal(0)) print(tvm.lower(s, [A, A_,B1], simple_mode=True)) f=tvm.build(s,[A,A_,B1],tgt,tgt_host,name=func_name) return f