def conv_bwd(N, CI, HI, WI, CO, HO, WO, KSIZE, stride, padding, dtype): strides = (stride, stride) shape_data = (N, CI, HI, WI) shape_weight = (CO, CI, KSIZE, KSIZE) shape_grad_output = (N, CO, HO, WO) # given tensor data = te.placeholder(shape_data, name="data", dtype=dtype) weight = te.placeholder(shape_weight, name="weight", dtype=dtype) grad_output = te.placeholder(shape_grad_output, name="grad_output", dtype=dtype) # grad_data out_h = (HO - 1) * strides[0] - 2 * padding + KSIZE out_w = (WO - 1) * strides[1] - 2 * padding + KSIZE output_padding = (HI - out_h, WI - out_w) grad_data = topi.nn.conv2d_transpose_nchw(grad_output, weight, strides, padding, dtype, output_padding) # grad_weight dilation_h, dilation_w = (1, 1) batch, in_channel, in_h, in_w = shape_data out_channel, _, filter_h, filter_w = shape_weight grad_output_tmp = topi.tile(grad_output, [1, in_channel, 1, 1]) grad_output_tmp = topi.reshape( grad_output_tmp, [batch * in_channel * out_channel, 1, HO, WO]) data_tmp = topi.reshape(data, [1, in_channel * batch, HI, WI]) grad_weight = topi.nn.group_conv2d_nchw(data_tmp, grad_output_tmp, stride=(dilation_h, dilation_w), padding=padding, dilation=strides, groups=in_channel * batch, out_dtype=dtype) # infer shape of grad_weight _, _, grad_h, grad_w = shape_grad_output fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple( padding, (filter_h, filter_w)) padded_weight_grad_h = (in_h - (grad_h - 1) * strides[0] - 1 + fpad_top + fpad_bottom) // dilation_h + 1 padded_weight_grad_w = (in_w - (grad_w - 1) * strides[1] - 1 + fpad_left + fpad_right) // dilation_w + 1 grad_weight = topi.reshape(grad_weight, [ batch, in_channel, out_channel, padded_weight_grad_h, padded_weight_grad_w ]) grad_weight = topi.sum(grad_weight, axis=0) grad_weight = topi.transpose(grad_weight, [1, 0, 2, 3]) if padded_weight_grad_h > filter_h or padded_weight_grad_w > filter_w: grad_weight = topi.strided_slice( grad_weight, begin=[0, 0, 0, 0], end=[out_channel, in_channel, filter_h, filter_w]) return [data, weight, grad_output, grad_data, grad_weight] return [data, weight, grad_output, grad_data, grad_weight]
def verify_reshape(src_shape, dst_shape): A = te.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device, ctx): print("Running on target: %s" % device) with tvm.target.Target(device): s = tvm.topi.testing.get_injective_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device, ctx in tvm.testing.enabled_targets(): check_device(device, ctx)
def batch_norm_fwd(N, C, H, W, dtype="float32"): dshape = (N, C, H, W) oshape = (C, ) bshape = (1, C, 1, 1) sshape = (1, ) data = te.placeholder(dshape, name="data", dtype=dtype) scale = te.placeholder(oshape, name="scale", dtype=dtype) bias = te.placeholder(oshape, name="bias", dtype=dtype) running_mean = te.placeholder(oshape, name="running_mean", dtype=dtype) running_var = te.placeholder(oshape, name="running_var", dtype=dtype) eps = te.placeholder(sshape, name="eps", dtype=dtype) momentum = te.placeholder(sshape, name="momentum", dtype=dtype) axis = (0, 2, 3) num_ele = dshape[0] * dshape[2] * dshape[3] frac_num_ele = 1.0 / num_ele # compute batch mean mean_sum = topi.sum(data, axis, keepdims=True) saved_mean = topi.multiply(mean_sum, frac_num_ele) # compute batch rvars var_sub = topi.subtract(data, saved_mean) var_mul = topi.multiply(var_sub, var_sub) var_sum = topi.sum(var_mul, axis, keepdims=True) var = topi.multiply(var_sum, frac_num_ele) output_add = topi.add(var, eps) saved_rvars = topi.sqrt(output_add) # # compute output output_sub = topi.subtract(data, saved_mean) output_norm = topi.divide(output_sub, saved_rvars) scale_board = topi.reshape(scale, bshape) bias_board = topi.reshape(bias, bshape) output = topi.add(topi.multiply(output_norm, scale_board), bias_board) # reshape saved_rvars saved_rvars = topi.reshape(saved_rvars, oshape) # update running mean running_mean_mul1 = topi.multiply(running_mean, topi.subtract(1.0, momentum)) running_mean_mul2 = topi.multiply(topi.reshape(saved_mean, oshape), momentum) running_mean_out = topi.add(running_mean_mul1, running_mean_mul2) # update running var saved_var_mul1 = topi.multiply(running_var, topi.subtract(1.0, momentum)) saved_var_mul2 = topi.multiply(topi.reshape(var, oshape), momentum) running_var_out = topi.add(saved_var_mul1, saved_var_mul2) # reshape saved_mean saved_mean = topi.reshape(saved_mean, oshape) return [ data, scale, bias, running_mean, running_var, momentum, eps, output, saved_mean, saved_rvars, running_mean_out, running_var_out ]
def batch_norm_bwd(N, C, H, W, dtype="float32"): dshape = (N, C, H, W) oshape = (C, ) bshape = (1, C, 1, 1) sshape = (1, ) data = te.placeholder(dshape, name="data", dtype=dtype) scale = te.placeholder(oshape, name="scale", dtype=dtype) saved_mean = te.placeholder(oshape, name="saved_mean", dtype=dtype) saved_var = te.placeholder(oshape, name="saved_var", dtype=dtype) eps = te.placeholder(sshape, name="eps", dtype=dtype) grad_output = te.placeholder(dshape, name="data", dtype=dtype) axis = (0, 2, 3) num_ele = dshape[0] * dshape[2] * dshape[3] frac_num_ele = 1.0 / num_ele # compute grad_input mean_sum = topi.sum(data, axis, True) mean = topi.multiply(mean_sum, frac_num_ele) var_sub = topi.subtract(data, mean) var_mul = topi.multiply(var_sub, var_sub) var_sum = topi.sum(var_mul, axis, True) var = topi.multiply(var_sum, frac_num_ele) var_eps = topi.add(var, eps) output_sqrt = topi.sqrt(var_eps) x_norm = topi.subtract(data, mean) x_hat = topi.divide(x_norm, output_sqrt) dx_hat = topi.multiply(grad_output, topi.reshape(scale, bshape)) grad_input_sum1 = topi.sum(dx_hat * x_hat, axis, True) grad_input_sum2 = topi.sum(dx_hat, axis, True) grad_input_left = topi.divide(frac_num_ele, topi.sqrt(var_eps)) grad_input_right1 = topi.subtract(topi.multiply(dx_hat, num_ele), grad_input_sum2) grad_input_right2 = topi.multiply(x_hat, grad_input_sum1) grad_input = topi.multiply( grad_input_left, topi.subtract(grad_input_right1, grad_input_right2)) # compute grad_scale and grad_bias grad_scale = topi.sum(grad_output * x_hat, axis) grad_bias = topi.sum(grad_output, axis) return [ data, scale, saved_mean, saved_var, eps, grad_output, grad_input, grad_scale, grad_bias ]
def verify_reshape(src_shape, dst_shape): A = te.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = tvm.topi.testing.get_injective_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def test_topi(): X = te.placeholder((1, 2, 4, 4), name="X") W = te.placeholder((5, 2, 3, 3), name="W") W1 = te.placeholder((2, 5, 3, 3), name="W1") W2 = te.placeholder((1, ), name="W2") R = topi.nn.conv2d(X, W, 1, 1, 1) check_grad(R, [X, W]) R1 = topi.nn.conv2d(topi.nn.relu(R), W1, 1, 0, 1) check_grad(R1, [X, W, W1]) R = topi.broadcast_to(W2, (5, 2, 3, 3)) check_grad(R, [W2]) R = topi.nn.conv2d(X, topi.broadcast_to(W2, (5, 2, 3, 3)), 1, 1, 1) check_grad(R, [X, W2]) R = topi.nn.pool(X, [2, 2], [2, 2], [0, 0, 0, 0], "avg") check_grad(R, X) R = topi.nn.pool(X, [2, 2], [2, 2], [0, 0, 0, 0], "max") check_grad(R, X) X = te.placeholder((1, 2, 5, 5), name="X") R = topi.reshape(X, (1, 32)) check_grad(R, [X]) X = te.placeholder((1, 2, 5, 5), name="X") W = te.placeholder((2, 2, 3, 3), name="W") S = topi.reshape(X, (1, 50)) check_grad(S, [X]) R = X + topi.nn.conv2d(X + topi.nn.conv2d(X, W, 1, 1, 1), W, 1, 1, 1) check_grad(R, [X, W]) S = topi.nn.softmax(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.sigmoid(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.tanh(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.nn.log_softmax(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) check_grad(S, [W], [X]) X = te.placeholder((1, 2, 3, 5), name="X") Y = te.placeholder((1, 2, 7, 5), name="Y") S = topi.concatenate((X, Y), 2) check_grad(S, [X, Y]) X = te.placeholder((1, 2, 6, 5), name="X") (S, R) = topi.split(X, 2, 2) check_grad(S, [X]) check_grad(R, [X]) R1 = topi.concatenate((S, R), 2) check_grad(R1, [X]) R2 = topi.concatenate((R, S), 2) check_grad(R2, [X]) X = te.placeholder((4, 5), name="X") I = te.placeholder((100, ), name="I", dtype="int32") R = topi.take(X, topi.abs(I)) check_grad(R, [X], [I]) W = te.placeholder((5, 5), name="W") exps = topi.exp(topi.nn.dense(X, W)) sumexps = topi.sum(exps, axis=-1, keepdims=True) R = exps / sumexps check_grad(R, [X, W], data_range=(-1, 1))
def batch_norm( data: te.Tensor, gamma: te.Tensor, beta: te.Tensor, moving_mean: te.Tensor, moving_var: te.Tensor, axis: typing.Optional[int] = None, epsilon: typing.Optional[float] = None, center: typing.Optional[bool] = None, scale: typing.Optional[bool] = None, ) -> typing.List[te.Tensor]: """Batch normalization layer (Ioffe and Szegedy, 2014). Normalizes the input at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. Parameters ---------- data : tvm.te.Tensor Input to be batch-normalized. gamma : tvm.te.Tensor Scale factor to be applied to the normalized tensor. beta : tvm.te.Tensor Offset to be applied to the normalized tensor. moving_mean : tvm.te.Tensor Running mean of input. moving_var : tvm.te.Tensor Running variance of input. axis : int, optional, default=1 Specify along which shape axis the normalization should occur. epsilon : float, optional, default=1e-5 Small float added to variance to avoid dividing by zero. center : bool, optional, default=True If True, add offset of beta to normalized tensor, If False, beta is ignored. scale : bool, optional, defualt=True If True, scale normalized tensor by gamma. If False, gamma is ignored. Returns ------- output : list of tvm.te.Tensor Normalized data with same shape as input moving_mean : tvm.te.Tensor Running mean of input. moving_var : tvm.te.Tensor Running variance of input. """ if axis is None: axis = 1 if epsilon is None: epsilon = 1e-5 if center is None: center = True if scale is None: scale = True shape = [1] * len(data.shape) shape[axis] = data.shape[axis] moving_mean_rs = topi.reshape(moving_mean, shape) moving_var_rs = topi.reshape(moving_var, shape) out = (data - moving_mean_rs) / topi.math.sqrt(moving_var_rs + epsilon) if scale: out = out * topi.reshape(gamma, shape) if center: out = out + topi.reshape(beta, shape) # Moving mean and var aren't updated during test. To avoid # placeholder reuse, we multiply by 1 and return them. return [out, moving_mean * 1, moving_var * 1]
def topi_cuda_calc_func(channel, x, y): return topi.add(x, topi.reshape(y, (channel, 1, 1)))