def ReLU6Grad(y_grad, x, target=utils.CUDA): """ Computes Gradients of Rectified Linear 6. Args: y_grad (tvm.tensor.Tensor): Tensor of type float16, float32, gradients backpropagated to the ReLU6 op. x (tvm.tensor.Tensor): Tensor of type float16/float32, inputs that where passed to the ReLU6 op, or its outputs. Returns: tvm.tensor.Tensor, has same type and shape as x. Supported Platforms: 'GPU' """ if target != utils.CUDA: raise RuntimeError("the target %s is not supported!" % target) shape = x.shape dtype = x.dtype zero = tvm.const(0, dtype) six = tvm.const(6, dtype) res0 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) >= zero, x(*i), zero)) res6 = tvm.compute( shape, lambda *i: tvm.if_then_else(x(*i) >= six, zero, res0(*i))) res = tvm.compute( shape, lambda *i: tvm.if_then_else(res6(*i) == zero, zero, y_grad(*i))) return res
def topi_nn_hsigmoid(x): """ topi hsigmoid Args: x: Returns: """ return tvm.compute(x.shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0, tvm.if_then_else(x(*i) >= 3, 1, (x(*i) + 3) / 6)))
def HSigmoidGrad(y_grad, x): """ HSigmoidGrad Args: y_grad: x: Returns: """ return tvm.compute( x.shape, lambda *i: tvm.if_then_else( x(*i) <= -3, 0, tvm.if_then_else(x(*i) >= 3, 0, y_grad(*i) / 6)))
def topi_nn_HSwish(x): """ topi HSwish Args: x: Returns: """ return tvm.compute( x.shape, lambda *i: tvm.if_then_else( x(*i) <= -3, 0, tvm.if_then_else(x(*i) >= 3, x(*i), x(*i) * (x(*i) + 3) / 6)))
def HSwishGrad(y_grad, x, target=utils.CUDA): """ HSwishGrad Args: y_grad: x: Returns: """ if target != utils.CUDA: raise RuntimeError("the target %s is not supported!" % target) shape = x.shape res0 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0, y_grad(*i) * (2 * x(*i) + 3) / 6)) res6 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) >= 3, y_grad(*i), res0(*i))) return res6
def TensorcoreConv(data, weight, stride=[1, 1], pad=[0, 0, 0, 0], dilation=[1, 1], out_dtype="float32", name="out", target=utils.CUDA): batch, in_h, in_w, in_c = data.shape out_c, k_h, k_w, _ = weight.shape pad_top, pad_bottom, pad_left, pad_right = pad s_h, s_w = stride d_h, d_w = dilation k_h_d = (k_h - 1) * d_h + 1 k_w_d = (k_w - 1) * d_w + 1 o_h = (in_h + pad_top + pad_bottom - k_h_d) // s_h + 1 o_w = (in_w + pad_left + pad_right - k_w_d) // s_w + 1 has_pad = not (pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0) if has_pad: data_pad = tvm.compute( (batch, in_h + pad_top + pad_bottom, in_w + pad_left + pad_right, in_c), lambda n, h, w, i: tvm.if_then_else( tvm.all(h >= pad_top, h - pad_bottom < in_h, w >= pad_left, w - pad_right < in_w), data[n, h - pad_top, w - pad_left, i], tvm.const(0.0, "float16"), ), name="Pad", ) else: data_pad = data rc = tvm.reduce_axis((0, in_c), name="rc") rh = tvm.reduce_axis((0, k_h), name="rh") rw = tvm.reduce_axis((0, k_w), name="rw") if out_dtype == "float32": out = tvm.compute( (batch, o_h, o_w, out_c), lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), ( w * s_w + rw * d_w), rc].astype("float32") * weight[ o, rh, rw, rc].astype("float32"), axis=[rc, rh, rw]), name=name) else: out = tvm.compute( (batch, o_h, o_w, out_c), lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), ( w * s_w + rw * d_w), rc] * weight[o, rh, rw, rc], axis=[rc, rh, rw]), name=name) return out
def relu_grad(head, in_data): shape = head.shape dtype = head.dtype zero = tvm.const(0, dtype) relugrad = tvm.compute( shape, lambda *i: tvm.if_then_else(in_data(*i) >= zero, head(*i), zero), tag=tag.INJECTIVE) return relugrad
def HSwishGrad(y_grad, x): """ HSwishGrad Args: y_grad: x: Returns: """ shape = x.shape res0 = tvm.compute( shape, lambda *i: tvm.if_then_else( x(*i) <= -3, 0, y_grad(*i) * (2 * x(*i) + 3) / 6)) res6 = tvm.compute( shape, lambda *i: tvm.if_then_else(x(*i) >= 3, y_grad(*i), res0(*i))) return res6
def fused_relu_grad_bn_double_reduce_grad(data0, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10, data11, data12, data13, data14, data15, layout="NHWC", out_dtype="float16", target=utils.CUDA): if layout == 'NCHW': data5 = topi.transpose(data5, (0, 2, 3, 1)) data9 = topi.transpose(data9, (0, 2, 3, 1)) data13 = topi.transpose(data13, (0, 2, 3, 1)) data14 = topi.transpose(data14, (0, 2, 3, 1)) data15 = topi.transpose(data15, (0, 2, 3, 1)) elif layout != 'NHWC': raise NotImplementedError( 'Layout not supported {} '.format(layout)) inter_dtype = "float32" n, h, w, c = data5.shape scale = n * h * w mul = topi.multiply(data2, data3) mul1221 = topi.divide(mul, scale) # ReluGrad zero = tvm.const(0, data15.dtype) add = topi.add(data13, data14) addgrad = tvm.compute(add.shape, lambda *i: tvm.if_then_else(data15(*i) >= zero, add(*i), zero), tag=tag.INJECTIVE) addgrad = topi.cast(addgrad, inter_dtype) mul3283 = topi.multiply(scale, addgrad) sub1159 = topi.subtract(mul3283, data6) data5_cast = topi.cast(data5, inter_dtype) mul2372 = topi.divide(data4, scale) sub631 = topi.subtract(data5_cast, mul2372) mul1220 = topi.multiply(sub631, data1) div = topi.divide(mul1220, data0) sub271 = topi.subtract(sub1159, div) mul1218 = topi.multiply(mul1221, sub271) mul1218_cast = topi.cast(mul1218, out_dtype) mul1231 = topi.multiply(data11, data12) mul1230 = topi.divide(mul1231, scale) data9_cast = topi.cast(data9, inter_dtype) mul2364 = topi.divide(data8, scale) sub625 = topi.subtract(data9_cast, mul2364) mul1229 = topi.multiply(data10, sub625) div272 = topi.divide(mul1229, data7) sub272 = topi.subtract(sub1159, div272) mul1228 = topi.multiply(mul1230, sub272) mul1228_cast = topi.cast(mul1228, out_dtype) if layout == "NCHW": mul1218_cast = topi.transpose(mul1218_cast, (0, 3, 1, 2)) mul1228_cast = topi.transpose(mul1228_cast, (0, 3, 1, 2)) return [mul1218_cast, mul1228_cast]
def ReLU6Grad(y_grad, x): """ Computes Gradients of Rectified Linear 6. Args: y_grad (tvm.tensor.Tensor): Tensor of type float16, float32, gradients backpropagated to the ReLU6 op. x (tvm.tensor.Tensor): Tensor of type float16/float32, inputs that where passed to the ReLU6 op, or its outputs. Returns: tvm.tensor.Tensor, has same type and shape as x. """ shape = x.shape dtype = x.dtype zero = tvm.const(0, dtype) six = tvm.const(6, dtype) res0 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) >= zero, x(*i), zero)) res6 = tvm.compute( shape, lambda *i: tvm.if_then_else(x(*i) >= six, zero, res0(*i))) res = tvm.compute( shape, lambda *i: tvm.if_then_else(res6(*i) == zero, zero, y_grad(*i))) return res
def fcompute(*output_indices): input_indices = [] batch_len = len(output_indices) - 4 n1_indice = output_indices[batch_len] m1_indice = output_indices[batch_len + 1] m0_indcie = output_indices[batch_len + 2] n0_indcie = output_indices[batch_len + 3] m_indice = m1_indice * cs + m0_indcie n_indice = n1_indice * cs + n0_indcie for i in range(0, batch_len): input_indices.append(output_indices[i]) input_indices.append(m_indice) input_indices.append(n_indice) res = tvm.if_then_else(tvm.any(m_indice >= m, n_indice >= n), tvm.const(0, dtype), data(*input_indices)) return res