def verify_matmul(sa, sb, transp_a, transp_b): a = np.random.uniform(low=-1.0, high=1.0, size=sa).astype(np.float32) b = np.random.uniform(low=-1.0, high=1.0, size=sb).astype(np.float32) c1 = np.matmul(np.transpose(a) if transp_a else a, np.transpose(b) if transp_b else b) c2 = with_tvm(lambda A,B: topi.matmul(A,B,transp_a,transp_b), a,b) np.testing.assert_allclose(c1, c2, rtol=1e-5)
def verify_matmul(sa, sb, transp_a, transp_b): a = np.random.uniform(low=-1.0, high=1.0, size=sa).astype(np.float32) b = np.random.uniform(low=-1.0, high=1.0, size=sb).astype(np.float32) c1 = np.matmul(np.transpose(a) if transp_a else a, np.transpose(b) if transp_b else b) c2 = with_tvm(lambda A,B: topi.matmul(A,B,transp_a,transp_b), a,b) tvm.testing.assert_allclose(c1, c2, rtol=1e-5, atol=1e-5)
def MatMul(device="llvm", lib_path="./", ndim_a=None, ndim_b=None, dtype=None, transpose_a=None, transpose_b=None): ''' matmul Args: device: lib_path: ndim_a: ndim_b: dtype: transpose_a: transpose_b: Returns: ''' m, k, n_dim = tvm.var("m"), tvm.var("k"), tvm.var("n_dim") a_shape = (m, k) if not transpose_a else (k, m) b_shape = (k, n_dim) if not transpose_b else (n_dim, k) opname = "MatMul_ndimA%d_ndimB%d_%s_%d_%d" % (ndim_a, ndim_b, dtype, transpose_a, transpose_b) print(opname) # define compute in_tensor = tvm.placeholder(a_shape, dtype=dtype, name='in_tensor') b_tensor = tvm.placeholder(b_shape, dtype=dtype, name='b_tensor') out_tensor = topi.matmul(in_tensor, b_tensor, transpose_a, transpose_b) tensor_list = [in_tensor, b_tensor, out_tensor] s = topi.generic.schedule_elemwise(out_tensor) Genlib(s, tensor_list, device, opname, lib_path)
s = tvm.placeholder((batch_size, num_hidden), 'float32') h = tvm.placeholder((batch_size, num_hidden), 'float32') # Tensors and vars for training graph weights = [tvm.placeholder(x, 'float32') for x in sizes] #Construct model xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1) xs = [topi.reshape(x, (batch_size, num_input)) for x in xs] new_s = s new_h = h for i in range(num_timesteps): inp = topi.concatenate([xs[i], new_h], 1) g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1]) j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3]) f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5]) o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7]) new_s = new_s * f + g * j new_h = topi.tanh(new_s) * o logits = topi.matmul(new_h, weights[8]) + weights[9] # compute accuracy pred = topi.nn.softmax(logits) correct_pred = topi.equal(topi.argmax(y, 1), topi.argmax(pred, 1)) accuracy = topi.sum(correct_pred.astype('float32')) / batch_size # Define loss and optimizer