示例#1
0
def _compute_log(data_input, target=utils.CCE):
    """atanh(x) value is 0.5*log((1+x)/(1-x))"""

    data_1_sum_x = topi.add(data_input, dc.one_const(data_input.dtype))
    data_sub_x = topi.multiply(data_input, dc.neg_one_const(data_input.dtype))
    data_1_sub_x = topi.add(data_sub_x, dc.one_const(data_input.dtype))
    data_x_mul = data_1_sum_x / data_1_sub_x
    data_x_log = log(data_x_mul, target)
    data_res = topi.multiply(data_x_log, dc.half_const(data_input.dtype))

    return data_res
示例#2
0
def _compute_log(data_input):
    """Atanh(x) = 0.5*log((1+x)/(1-x))"""

    data_1_sum_x = topi.add(data_input, dc.one_const(data_input.dtype))
    data_sub_x = topi.multiply(data_input, dc.neg_one_const(data_input.dtype))
    data_1_sub_x = topi.add(data_sub_x, dc.one_const(data_input.dtype))
    data_x_mul = data_1_sum_x / data_1_sub_x
    data_x_log = log.log(data_x_mul)
    data_res = topi.multiply(data_x_log, dc.half_const(data_input.dtype))

    return data_res
示例#3
0
def _apply_ada_max_compute(var, m, v, grad, lr, beta1, beta1_power, beta2,
                           epsilon):
    """Compute ada_max."""
    # cast to float32 for improved accuracy
    inp_dtype = var.dtype
    if inp_dtype == 'float16':
        var = topi.cast(var, 'float32')
        m = topi.cast(m, 'float32')
        v = topi.cast(v, 'float32')
        lr = topi.cast(lr, 'float32')
        beta1_power = topi.cast(beta1_power, 'float32')
        beta1 = topi.cast(beta1, 'float32')
        beta2 = topi.cast(beta2, 'float32')
        grad = topi.cast(grad, 'float32')
    epsilon = tvm.const(epsilon, 'float32')

    # m += (grad - m) * (1 - beta1)
    rhs = tvm.compute(beta1.shape,
                      lambda *i: beta1(*i) * neg_one_const("float32"))
    rhs = tvm.compute(rhs.shape, lambda *i: rhs(*i) + one_const("float32"))
    lhs = topi.subtract(grad, m)
    rhs = tvm.compute(lhs.shape, lambda *i: lhs(*i) * rhs[0])
    m = topi.add(m, rhs)

    # v = max(beta2*v, abs(grad))
    lhs = tvm.compute(v.shape, lambda *i: v(*i) * beta2[0])
    rhs = topi.abs(grad)
    v = topi.maximum(lhs, rhs)

    # var -= lr / (1 - beta1_power) * (m / (v + epsilon))
    # lr * m / (1 - beta1_power) * (v + epsilon)
    # v + epsilon
    rhs = tvm.compute(v.shape, lambda *i: v(*i) + epsilon)
    # 1 - beta1_power
    lhs = tvm.compute(beta1_power.shape,
                      lambda *i: beta1_power(*i) * neg_one_const("float32"))
    lhs = tvm.compute(lhs.shape, lambda *i: lhs(*i) + one_const("float32"))
    # (1 - beta1_power) * (v + epsilon)
    rhs = tvm.compute(rhs.shape, lambda *i: rhs(*i) * lhs[0])
    # lr * m
    lhs = tvm.compute(m.shape, lambda *i: m(*i) * lr[0])
    # lr * m / (1 - beta1_power) * (v + epsilon)
    rhs = reciprocal(rhs)
    rhs = topi.multiply(lhs, rhs)
    var = topi.subtract(var, rhs)

    if inp_dtype == 'float16':
        var = topi.cast(var, inp_dtype)
        m = topi.cast(m, inp_dtype)
        v = topi.cast(v, inp_dtype)

    return var, m, v
示例#4
0
文件: asin.py 项目: zhuyawen/akg
def _asin_compute(data_input):
    """Compute asin"""

    dtype = data_input.dtype
    boundary = tvm.const(BOUNDARY, "float32")

    # Change dtype to float32
    if dtype == "float16":
        data_input = topi.cast(data_input, "float32")

    # Sign mask
    data_sign = sign(data_input)

    # All positive
    data1 = topi.multiply(data_input, data_sign)

    # x belongs to (0, 2^(-0.5))
    choice_1 = topi.minimum(data1, boundary)
    choice_1 = topi.subtract(choice_1, boundary)
    choice_1_floor = akg.lang.cce.floor(choice_1)
    # the dtype of choice_1_floor is int32, need to be cast to fp32.
    if utils.product_is_mini():
        choice_1_floor = topi.cast(choice_1_floor, "float16")
        choice_1_floor = topi.cast(choice_1_floor, "float32")
    else:
        choice_1_floor = topi.cast(choice_1_floor, "float32")
    choice_1 = topi.multiply(choice_1_floor, neg_one_const("float32"))

    taylor1 = _taylor_compute(data1)
    res_1 = topi.multiply(taylor1, choice_1)

    # x belongs to (2^(-0.5), 1)
    choice_2 = topi.subtract(one_const("float32"), choice_1)
    data2 = topi.subtract(one_const("float32"), topi.multiply(data1, data1))
    data2_sqrt = _sqrt(data2)

    taylor2 = _taylor_compute(data2_sqrt, data2)

    res_2 = topi.multiply(taylor2, neg_one_const("float32"))
    res_2 = topi.add(res_2, tvm.const(HALF_PI, "float32"))
    res_2 = topi.multiply(res_2, choice_2)

    # Restore sign
    res_1 = topi.add(res_1, res_2)
    res_1 = topi.multiply(res_1, data_sign)

    # Restore dtype
    if dtype == "float16":
        res_1 = topi.cast(res_1, "float16")

    return res_1
示例#5
0
def atan_grad(head, input_x):
    """
    Compute gradient of input_x in atan.

    .. math::
        dx = \\frac{1}{1 + x^2} \\cdot dy

    Args:
        head (tvm.tensor.Tensor): Gradient tensor of forward's output with the
                                  same shape and dtype as input_x.
        input_x (tvm.tensor.Tensor): Forward's input tensor support float16
                                     and float32.

    Returns:
        A tvm.tensor.Tensor as gradient of forward's input.
    
    Supported Platforms:
        'Ascend'
    """
    utils.elemwise_shape_check(head.shape, input_x.shape)
    utils.elemwise_dtype_check(head.dtype, input_x.dtype,
                               utils.DtypeForDavinci.ALL_FLOAT)

    dtype = input_x.dtype
    tensor_one = dc.one_const(dtype)

    def _compute(*i):
        return tensor_one / (tensor_one + input_x(*i) * input_x(*i)) * head(*i)

    out_tensor = tvm.compute(input_x.shape, _compute, name="out")

    return out_tensor
示例#6
0
def _atan_compute(data):
    """compute for atan"""
    dtype = data.dtype

    if dtype == "float16":
        data = topi.cast(data, "float32")

    abs_data = topi.abs(data)
    tensor_one = dc.one_const(abs_data.dtype)

    abs_data_sub_one = topi.subtract(abs_data, tensor_one)
    abs_data_add_one = topi.add(abs_data, tensor_one)
    abs_data2 = topi.abs(topi.divide(abs_data_sub_one, abs_data_add_one))

    # calucate data less than one
    res = _do_atan_taylor(abs_data)
    # calucate data more than one
    res_mt_one = topi.add(_do_atan_taylor(abs_data2),
                          tvm.const(CONST_PI_BY_FOUR, abs_data2.dtype))
    res = topi.minimum(res, res_mt_one)

    if utils.product_is_mini() and data.dtype == "float32":
        sign_mask = topi.cast(topi.sign(topi.cast(data, "float16")), "float32")
    else:
        sign_mask = topi.sign(data)

    res = topi.multiply(res, sign_mask)

    if dtype == "float16":
        res = topi.cast(res, "float16")

    return res
示例#7
0
def _less_equal_compare_float32(data_x, data_y):
    """if x <= y, then return 1, else 0"""
    data_out = tvm.compute(
        data_x.shape, lambda *index: tvm.expr.Select(
            data_x(*index) <= data_y(*index), dc.one_const(data_x.dtype),
            dc.zero_const(data_x.dtype)))
    return data_out
示例#8
0
def reduce_all(data, axis=None, keepdims=False):
    """
    Computes logical and of the input tensor.

    Args:
        data(tvm.tensor.Tensor): Tensor of type Boolean.
        axis(Union[None, int, list, tuple]): Specifies which axes to reduce, if None, all dimensions of
            input tensor data will be reduced and the shape of output tensor will be (1,).
        keepdims(Union[None, bool]): if true, keep the dimensions with length 1.

    Returns:
        tvm.tensor.Tensor of same type as input tensor data.
    """

    shape = [x.value for x in data.shape]

    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.BOOL)
    vc_util.check_shape(shape)

    if axis is None and keepdims is False:
        raise ValueError("keepdims must be True when axis is None!")

    axis_new = ft_util.refine_reduce_axis(data, axis)

    xx1 = akg.tvm.compute(shape,
                          lambda *indice: data(*indice).astype("float16"),
                          name='xx1')
    xx = (-xx1 + dc.one_const("float16"))
    yy = akg.topi.sum(xx, axis=axis_new, keepdims=keepdims)

    o_shape = list(yy.shape)

    zz = akg.tvm.compute(o_shape,
                         lambda *indice: yy(*indice).astype("bool"),
                         name='zz')

    y1 = akg.tvm.compute(
        o_shape,
        lambda *indice: akg.tvm.expr.Select(zz(
            *indice), dc.zero_const("float16"), dc.one_const("float16")),
        name="y1")

    y = akg.tvm.compute(o_shape,
                        lambda *indice: y1(*indice).astype("bool"),
                        name='y')

    return y
示例#9
0
def _compute_m_t(m, beta, grad):
    """Update m."""
    beta_tmp = tvm.compute(m.shape, lambda *indice: m(*indice) * beta[0])
    beta_na = tvm.compute(
        beta.shape, lambda *indice: beta(*indice) * neg_one_const("float32"))
    beta_na = tvm.compute(
        beta_na.shape, lambda *indice: beta_na(*indice) + one_const("float32"))
    beta_sub_tmp = tvm.compute(grad.shape,
                               lambda *indice: grad(*indice) * beta_na[0])
    m_t = topi.add(beta_tmp, beta_sub_tmp)
    return m_t
示例#10
0
文件: atan2.py 项目: zhuyawen/akg
def _init_atan2_mask(data_y_, data_x_):
    """
    Compute mask for atan2.

    Args:
        data_y (tvm.tensor.Tensor): The y of atan2(y, x).
        data_x (tvm.tensor.Tensor): The x of atan2(y, x).

    Returns:
        mask (tvm.tensor.Tensor): The mask of x's and y's value.
    """
    is_cast_for_mini = utils.product_is_mini() and data_y_.dtype == "float32"

    # in mini, select only support float16
    if is_cast_for_mini:
        data_x = topi.cast(data_x_, "float16")
        data_y = topi.cast(data_y_, "float16")
    else:
        data_x = data_x_
        data_y = data_y_

    dtype_input = data_y.dtype

    tensor_one = dc.one_const(dtype_input)
    tensor_zero = dc.zero_const(dtype_input)
    tensor_neg_one = dc.neg_one_const(dtype_input)

    y_ge_zero = tvm.compute(
        data_y.shape,
        lambda *i: tvm.expr.Select(
            data_y(*i) >= tensor_zero, tensor_one, tensor_neg_one),
        name="y_ge_zero")

    x_lt_zero_y_mask = tvm.compute(
        data_y.shape,
        lambda *i: tvm.expr.Select(
            data_x(*i) < tensor_zero, y_ge_zero(*i), tensor_zero),
        name="xlt0_y_mask")

    if is_cast_for_mini:
        x_lt_zero_y_mask = topi.cast(x_lt_zero_y_mask, "float32")
        y_ge_zero = topi.cast(y_ge_zero, "float32")

    return (x_lt_zero_y_mask, y_ge_zero)
示例#11
0
def _do_atan_taylor(data):
    """
    Taylor algorithm for atan.

        if x > 0 and x < tan(pi/8):
            atan(x) = x - x^3/3 + x^5/5 - x^7/7 ...
        elif x > tan(pi/8) and x < tan(pi/4):
            atan(x) = atan(y) + atan((x-y)/(1+xy))

    Args:
        data (tvm.tensor.Tensor): Input data.

    Returns:
        A tvm.tensor.Tensor of atan(x).
    """
    dtype = data.dtype

    tensor_offset = tvm.const(TAN_PI_BY_EIGHT, dtype)
    deno = topi.multiply(data, tvm.const(TAN_PI_BY_EIGHT, dtype))
    deno = topi.add(deno, dc.one_const(dtype))
    molecule = topi.subtract(data, tensor_offset)
    ddata = topi.divide(molecule, deno)
    ddata = topi.abs(ddata)

    square_ddata = topi.multiply(ddata, ddata)
    res = tvm.const(ATAN_TAYLOR_COEF[CONST_ITERTOR], dtype)
    for i in reversed(range(CONST_ITERTOR)):
        res = topi.multiply(res, square_ddata)
        res = topi.add(res, tvm.const(ATAN_TAYLOR_COEF[i], dtype))
    res = topi.multiply(res, ddata)
    res = topi.add(res, tvm.const(CONST_PI_BY_EIGHT, dtype))

    square_data = topi.multiply(data, data)
    res2 = tvm.const(ATAN_TAYLOR_COEF[CONST_ITERTOR2], dtype)
    for i in reversed(range(CONST_ITERTOR2)):
        res2 = topi.multiply(res2, square_data)
        res2 = topi.add(res2, tvm.const(ATAN_TAYLOR_COEF[i], dtype))
    return topi.minimum(res, topi.multiply(res2, data))
示例#12
0
def erfc(input_x):
    r"""
    Computes the complementary error of input_x.

    .. math::
        \operatorname{erfc} (x) = 1 - \operatorname{erf} (x).

    Args:
        input_x (tvm.tensor.Tensor): Input tensor, only support float16, float32.

    Returns:
        tvm.tensor.Tensor with the same shape and dtype as input_x.
    """

    dtype = input_x.dtype

    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(input_x.shape)

    erfc_res = topi.add(dc.one_const(dtype),
                        topi.multiply(dc.neg_one_const(dtype), erf(input_x)))

    return erfc_res
示例#13
0
def _bool_negate(input_bool):
    """Negate every value"""
    return topi.subtract(dc.one_const(input_bool.dtype), input_bool)
示例#14
0
def nudged_min_max_compute(min_broadcast, max_broadcast, num_bits,
                           narrow_range):
    """
    Calculate the maximum and minimum values of the quantization.

    Notes:
        Each channel scale[i] euqal to (max_broadcast[i] - min_broadcast[i]) / (quant_max - quant_min).
        Then compute nudged_zero_point:
                nudged_zero_point = floor(between_min_max_float + 0.5) + less_quant_min_float + more_quant_max_float,
        between_min_max_float is first calculated by:
                zero_point_from_min = (quant_min_float - min_broadcast) / scale,
        then between_min_max_float = zero_point_from_min, which min_broadcast <= zero_point_from_min <= max_broadcast.
        Besides, the value of less_quant_min_float is equal to quant_min or zero, zero_point_from_min < quant_min_float,
        the value is quant_min, else is 0. The same as more_quant_max_float.
        Finally according to scale and nudged_zero_point to compute nudged_min and nudged_max:
                 nudged_min = (quant_min - nudged_zero_point) * scale
                 nudged_max = (quant_max - nudged_zero_point) * scale

    Args:
        min_broadcast (tvm.tensor.Tensor): minimum value to be quantified for each channel.
        max_broadcast (tvm.tensor.Tensor): maximum value to be quantified for each channel.
        num_bits (int): num_bits is the bitwidth of the quantization, range [2,16].
        narrow_range (bool): if True, for each channel, quantized into the quantization range [0, 2^num_bits - 1] else
                      quantized into the quantization range [1, 2^num_bits - 1].

    Returns:
        nudged_min (tvm.tensor.Tensor): The same type and shape as min_broadcast.
        nudged_max (tvm.tensor.Tensor): The same type and shape as max_broadcast.
        scale (tvm.tensor.Tensor): The same type and shape as max_broadcast.
    """

    dtype = min_broadcast.dtype
    quant_min = 1 if narrow_range else 0
    quant_max = (2**num_bits) - 1

    # because of need compute each channel, so quant_min and quant_max need to broadcast.
    quant_min_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_min, dtype))
    quant_max_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_max, dtype))

    # caculate each channel max and min difference.
    max_sub_min = topi.subtract(max_broadcast, min_broadcast)
    quant_max_sub_quant_min = topi.subtract(quant_max_float, quant_min_float)
    # compute scale = (max_broadcast - min_broadcast) / (quant_max - quant_min)
    # and min_div_scale = min_broadcast / scale
    if product_is_mini():
        scale = mul(max_sub_min,
                    reciprocal(quant_max_sub_quant_min),
                    target=utils.CCE)
        min_div_scale = Mul(min_broadcast, reciprocal(scale), target=utils.CCE)
    else:
        scale = Divide(max_sub_min, quant_max_sub_quant_min, target=utils.CCE)
        min_div_scale = Divide(min_broadcast, scale, target=utils.CCE)

    # zero_point_from_min = quant_min_float - min_broadcast / scale
    zero_point_from_min = topi.subtract(quant_min_float, min_div_scale)
    # if zero_point_from_min < quant_min_float, bool_less_quant_min_float = 1 else 0
    bool_less_quant_min_float = less_compare_float32(zero_point_from_min,
                                                     quant_min_float)
    # if quant_max_float < zero_point_from_min, bool_more_quant_max_float = 1 else 0
    bool_more_quant_max_float = less_compare_float32(quant_max_float,
                                                     zero_point_from_min)

    # according to above bool param to select effective value
    less_quant_min_float = topi.multiply(quant_min_float,
                                         bool_less_quant_min_float)
    more_quant_max_float = topi.multiply(quant_max_float,
                                         bool_more_quant_max_float)

    # compute which num is not less than quant_min_float and not large than quant_max_float
    tensor_one = topi.full(min_broadcast.shape, dtype, dc.one_const(dtype))
    bool_not_less_quant_min_float = topi.subtract(tensor_one,
                                                  bool_less_quant_min_float)
    bool_not_more_quant_max_float = topi.subtract(tensor_one,
                                                  bool_more_quant_max_float)
    bool_between_min_max = topi.multiply(bool_not_less_quant_min_float,
                                         bool_not_more_quant_max_float)
    between_min_max_float = topi.multiply(zero_point_from_min,
                                          bool_between_min_max)
    # add 0.5 to num which min <= num <= max and then floor them.
    between_min_max_add_half_one = topi.add(between_min_max_float,
                                            dc.half_const(dtype))
    between_min_max_round = akg.lang.ascend.floor(between_min_max_add_half_one)
    if product_is_mini():
        between_min_max_round = topi.cast(between_min_max_round, "float16")

    between_min_max_round = topi.cast(between_min_max_round, "float32")

    # calculate the maximum and minimum values of the quantization
    nudged_zero_point_tmp = topi.add(less_quant_min_float,
                                     more_quant_max_float)
    nudged_zero_point = topi.add(nudged_zero_point_tmp, between_min_max_round)

    nudged_min_tmp = topi.subtract(quant_min_float, nudged_zero_point)
    nudged_max_tmp = topi.subtract(quant_max_float, nudged_zero_point)
    nudged_min = topi.multiply(nudged_min_tmp, scale)
    nudged_max = topi.multiply(nudged_max_tmp, scale)
    res = [nudged_min, nudged_max, scale]

    return res
示例#15
0
def _erf_compute(input_x):
    r"""
    Compute erf.

    .. math::
        \operatorname{erf}(x) = sign(x) \left(
            1 - (a_1t+a_2t^2+a_3t^3+a_4t^4+a_5t^5) e^{-x^2} + \epsilon(|x|)
            \right), \\
        t = \dfrac{1}{1+p|x|} \\
        \left|\epsilon(|x|)\right| \le 1.5 \times 10^{-7} \\
        where \; p=.3275911 \quad a_1=.254829592 \quad a_2=-.284496736 \\
        a_3=1.421413741 \quad a_4=-1.453152027 \quad a_5=1.061405429

    Args:
        input_x (tvm.tensor.Tensor): Input tensor.

    Returns:
        tvm.tensor.Tensor as rational approximation.
    """

    dtype = input_x.dtype
    shape = get_shape(input_x)

    cst_one = dc.one_const("float32")
    cst_neg_one = dc.neg_one_const("float32")
    cst_p = tvm.const(SCALER_P, "float32")
    cst_a1 = tvm.const(SCALER_A1, "float32")
    cst_a2 = tvm.const(SCALER_A2, "float32")
    cst_a3 = tvm.const(SCALER_A3, "float32")
    cst_a4 = tvm.const(SCALER_A4, "float32")
    cst_a5 = tvm.const(SCALER_A5, "float32")
    fp16_max = tvm.const(SCALER_FP16_MAX, "float32")
    fp16_min = tvm.const(SCALER_FP16_MIN, "float32")

    if dtype == "float16":
        input_x = topi.cast(input_x, "float32")

    # calculate: sign = floor[(x*fp16max) / (|x*fp16max| + fp16min)]
    data_sign_vmuls = topi.multiply(input_x, fp16_max)
    data_sign_abs = topi.abs(data_sign_vmuls)
    data_adds = topi.add(data_sign_abs, fp16_min)
    data_sign_div = div(data_sign_vmuls, data_adds)
    data_round = round_value(data_sign_div)
    # mini device should cast to fp16 first
    if utils.product_is_mini():
        data_round = topi.cast(data_round, "float16")
    tensor_sign = topi.cast(data_round, "float32")

    # t = 1 / (1 + px)
    tensor_abs = topi.abs(input_x)
    one_plus_px = topi.add(cst_one, topi.multiply(tensor_abs, cst_p))
    data_t = div(topi.full(shape, "float32", 1.0), one_plus_px)

    # e^{-x^2}
    abs_square = topi.multiply(tensor_abs, tensor_abs)
    neg_square = topi.multiply(abs_square, cst_neg_one)
    exp_neg_square = exp(neg_square)

    # a1t + a2t^2 + a3t^3 + a4t^4 + a5t^5 = ((((a5t + a4)t + a3)t + a2)t + a1)t
    tmp_a5 = topi.multiply(cst_a5, data_t)
    tmp_a5a4 = topi.multiply(topi.add(tmp_a5, cst_a4), data_t)
    tmp_a5a4a3 = topi.multiply(topi.add(tmp_a5a4, cst_a3), data_t)
    tmp_a5a4a3a2 = topi.multiply(topi.add(tmp_a5a4a3, cst_a2), data_t)
    data_muladd = topi.multiply(topi.add(tmp_a5a4a3a2, cst_a1), data_t)

    # erf = sign(x) * (1 - data_muladd * e^{-x^2})
    erf_res = topi.multiply(
        tensor_sign,
        topi.add(
            cst_one,
            topi.multiply(cst_neg_one,
                          topi.multiply(data_muladd, exp_neg_square))))

    if dtype == "float16":
        erf_res = topi.cast(erf_res, dtype)

    return erf_res