Пример #1
0
def ReLU6Grad(y_grad, x, target=utils.CUDA):
    """
    Computes Gradients of Rectified Linear 6.

    Args:
        y_grad (tvm.tensor.Tensor): Tensor of type float16, float32, gradients backpropagated to the ReLU6 op.
        x (tvm.tensor.Tensor): Tensor of type float16/float32, inputs that where passed to the ReLU6 op, or its outputs.

    Returns:
        tvm.tensor.Tensor, has same type and shape as x.
    
    Supported Platforms:
        'GPU'
    """
    if target != utils.CUDA:
        raise RuntimeError("the target %s is not supported!" % target)
    shape = x.shape
    dtype = x.dtype

    zero = tvm.const(0, dtype)
    six = tvm.const(6, dtype)

    res0 = tvm.compute(shape,
                       lambda *i: tvm.if_then_else(x(*i) >= zero, x(*i), zero))
    res6 = tvm.compute(
        shape, lambda *i: tvm.if_then_else(x(*i) >= six, zero, res0(*i)))
    res = tvm.compute(
        shape, lambda *i: tvm.if_then_else(res6(*i) == zero, zero, y_grad(*i)))
    return res
Пример #2
0
def topi_nn_hsigmoid(x):
    """
    topi hsigmoid
    Args:
        x:

    Returns:

    """
    return tvm.compute(x.shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0,
                                                            tvm.if_then_else(x(*i) >= 3, 1,
                                                                             (x(*i) + 3) / 6)))
Пример #3
0
def HSigmoidGrad(y_grad, x):
    """
    HSigmoidGrad
    Args:
        y_grad:
        x:

    Returns:

    """
    return tvm.compute(
        x.shape, lambda *i: tvm.if_then_else(
            x(*i) <= -3, 0, tvm.if_then_else(x(*i) >= 3, 0,
                                             y_grad(*i) / 6)))
Пример #4
0
def topi_nn_HSwish(x):
    """
    topi HSwish
    Args:
        x:

    Returns:

    """
    return tvm.compute(
        x.shape, lambda *i: tvm.if_then_else(
            x(*i) <= -3, 0,
            tvm.if_then_else(x(*i) >= 3, x(*i),
                             x(*i) * (x(*i) + 3) / 6)))
Пример #5
0
def HSwishGrad(y_grad, x, target=utils.CUDA):
    """
    HSwishGrad
    Args:
        y_grad:
        x:

    Returns:

    """
    if target != utils.CUDA:
        raise RuntimeError("the target %s is not supported!" % target)
    shape = x.shape
    res0 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0, y_grad(*i) * (2 * x(*i) + 3) / 6))
    res6 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) >= 3, y_grad(*i), res0(*i)))
    return res6
Пример #6
0
def TensorcoreConv(data,
                   weight,
                   stride=[1, 1],
                   pad=[0, 0, 0, 0],
                   dilation=[1, 1],
                   out_dtype="float32",
                   name="out",
                   target=utils.CUDA):
    batch, in_h, in_w, in_c = data.shape
    out_c, k_h, k_w, _ = weight.shape
    pad_top, pad_bottom, pad_left, pad_right = pad
    s_h, s_w = stride
    d_h, d_w = dilation
    k_h_d = (k_h - 1) * d_h + 1
    k_w_d = (k_w - 1) * d_w + 1
    o_h = (in_h + pad_top + pad_bottom - k_h_d) // s_h + 1
    o_w = (in_w + pad_left + pad_right - k_w_d) // s_w + 1

    has_pad = not (pad_left == 0 and pad_right == 0 and pad_top == 0
                   and pad_bottom == 0)

    if has_pad:
        data_pad = tvm.compute(
            (batch, in_h + pad_top + pad_bottom, in_w + pad_left + pad_right,
             in_c),
            lambda n, h, w, i: tvm.if_then_else(
                tvm.all(h >= pad_top, h - pad_bottom < in_h, w >= pad_left, w -
                        pad_right < in_w),
                data[n, h - pad_top, w - pad_left, i],
                tvm.const(0.0, "float16"),
            ),
            name="Pad",
        )
    else:
        data_pad = data

    rc = tvm.reduce_axis((0, in_c), name="rc")
    rh = tvm.reduce_axis((0, k_h), name="rh")
    rw = tvm.reduce_axis((0, k_w), name="rw")

    if out_dtype == "float32":
        out = tvm.compute(
            (batch, o_h, o_w, out_c),
            lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), (
                w * s_w + rw * d_w), rc].astype("float32") * weight[
                    o, rh, rw, rc].astype("float32"),
                                       axis=[rc, rh, rw]),
            name=name)
    else:
        out = tvm.compute(
            (batch, o_h, o_w, out_c),
            lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), (
                w * s_w + rw * d_w), rc] * weight[o, rh, rw, rc],
                                       axis=[rc, rh, rw]),
            name=name)

    return out
Пример #7
0
def relu_grad(head, in_data):
    shape = head.shape
    dtype = head.dtype

    zero = tvm.const(0, dtype)
    relugrad = tvm.compute(
        shape,
        lambda *i: tvm.if_then_else(in_data(*i) >= zero, head(*i), zero),
        tag=tag.INJECTIVE)
    return relugrad
Пример #8
0
def HSwishGrad(y_grad, x):
    """
    HSwishGrad
    Args:
        y_grad:
        x:

    Returns:

    """
    shape = x.shape

    res0 = tvm.compute(
        shape, lambda *i: tvm.if_then_else(
            x(*i) <= -3, 0,
            y_grad(*i) * (2 * x(*i) + 3) / 6))
    res6 = tvm.compute(
        shape, lambda *i: tvm.if_then_else(x(*i) >= 3, y_grad(*i), res0(*i)))
    return res6
Пример #9
0
def fused_relu_grad_bn_double_reduce_grad(data0, data1, data2, data3, data4, data5, data6, data7, data8,
                           data9, data10, data11, data12, data13, data14, data15, layout="NHWC",
                           out_dtype="float16", target=utils.CUDA):
    
    if layout == 'NCHW':
        data5 = topi.transpose(data5, (0, 2, 3, 1))
        data9 = topi.transpose(data9, (0, 2, 3, 1))
        data13 = topi.transpose(data13, (0, 2, 3, 1))
        data14 = topi.transpose(data14, (0, 2, 3, 1))
        data15 = topi.transpose(data15, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))
    
    inter_dtype = "float32"
    n, h, w, c = data5.shape
    scale = n * h * w

    mul = topi.multiply(data2, data3)
    mul1221 = topi.divide(mul, scale)

    # ReluGrad
    zero = tvm.const(0, data15.dtype)
    add = topi.add(data13, data14)
    addgrad = tvm.compute(add.shape, lambda *i: tvm.if_then_else(data15(*i) >= zero, add(*i), zero), tag=tag.INJECTIVE)
    addgrad = topi.cast(addgrad, inter_dtype)
    mul3283 = topi.multiply(scale, addgrad)
    sub1159 = topi.subtract(mul3283, data6)

    data5_cast = topi.cast(data5, inter_dtype)
    mul2372 = topi.divide(data4, scale)
    sub631 = topi.subtract(data5_cast, mul2372)
    mul1220 = topi.multiply(sub631, data1)
    div = topi.divide(mul1220, data0)
    sub271 = topi.subtract(sub1159, div)
    mul1218 = topi.multiply(mul1221, sub271)
    mul1218_cast = topi.cast(mul1218, out_dtype)

    mul1231 = topi.multiply(data11, data12)
    mul1230 = topi.divide(mul1231, scale)
    data9_cast = topi.cast(data9, inter_dtype)
    mul2364 = topi.divide(data8, scale)
    sub625 = topi.subtract(data9_cast, mul2364)
    mul1229 = topi.multiply(data10, sub625)

    div272 = topi.divide(mul1229, data7)
    sub272 = topi.subtract(sub1159, div272)
    mul1228 = topi.multiply(mul1230, sub272)
    mul1228_cast = topi.cast(mul1228, out_dtype)

    if layout == "NCHW":
        mul1218_cast = topi.transpose(mul1218_cast, (0, 3, 1, 2))
        mul1228_cast = topi.transpose(mul1228_cast, (0, 3, 1, 2))
    
    return [mul1218_cast, mul1228_cast]
Пример #10
0
def ReLU6Grad(y_grad, x):
    """
    Computes Gradients of Rectified Linear 6.

    Args:
        y_grad (tvm.tensor.Tensor): Tensor of type float16, float32, gradients backpropagated to the ReLU6 op.
        x (tvm.tensor.Tensor): Tensor of type float16/float32, inputs that where passed to the ReLU6 op, or its outputs.

    Returns:
        tvm.tensor.Tensor, has same type and shape as x.
    """
    shape = x.shape
    dtype = x.dtype

    zero = tvm.const(0, dtype)
    six = tvm.const(6, dtype)

    res0 = tvm.compute(shape,
                       lambda *i: tvm.if_then_else(x(*i) >= zero, x(*i), zero))
    res6 = tvm.compute(
        shape, lambda *i: tvm.if_then_else(x(*i) >= six, zero, res0(*i)))
    res = tvm.compute(
        shape, lambda *i: tvm.if_then_else(res6(*i) == zero, zero, y_grad(*i)))
    return res
Пример #11
0
 def fcompute(*output_indices):
     input_indices = []
     batch_len = len(output_indices) - 4
     n1_indice = output_indices[batch_len]
     m1_indice = output_indices[batch_len + 1]
     m0_indcie = output_indices[batch_len + 2]
     n0_indcie = output_indices[batch_len + 3]
     m_indice = m1_indice * cs + m0_indcie
     n_indice = n1_indice * cs + n0_indcie
     for i in range(0, batch_len):
         input_indices.append(output_indices[i])
     input_indices.append(m_indice)
     input_indices.append(n_indice)
     res = tvm.if_then_else(tvm.any(m_indice >= m, n_indice >= n),
                            tvm.const(0, dtype), data(*input_indices))
     return res