Пример #1
0
def my_dsl(dtype, kernel_name, attrs):
    m = tvm.var("M")
    n = tvm.var("N")
    A = tvm.placeholder((m, ), name="A", dtype=dtype)
    B = tvm.placeholder((m, ), name="B", dtype=dtype)

    if insn == "add":
        C = topi.add(A, B)
    elif insn == "sub":
        C = topi.subtract(A, B)
    if insn == "mul":
        C = topi.multiply(A, B)
    elif insn == "div":
        C = topi.divide(A, B)
    elif insn == "max":
        C = topi.maximum(A, B)
    elif insn == "min":
        C = topi.minimum(A, B)

    elif insn == "abs":
        C = tvm.compute(A.shape, lambda *index: tvm.abs(A(*index)), name='C')
    elif insn == "exp":
        C = topi.exp(A)
    elif insn == "log":
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)

    elif insn == "adds":
        C = A + tvm.const(2, dtype)
    elif insn == "muls":
        C = A * tvm.const(2, dtype)

    # C = tvm.compute((m, ), lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule([C.op])
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        if insnType == "binary":
            mod = akg.build(s, [A, B, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
        else:
            mod = akg.build(s, [A, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
    return mod
Пример #2
0
def _asin_grad_compute(x, dy):
    """Compute asin_grad."""

    dtype = x.dtype
    if dtype == "float16":
        x = topi.cast(x, "float32")
        dy = topi.cast(dy, "float32")

    # step 1: calculate num_to_vrsqrt = 1 - x^2
    data = topi.multiply(x, x)
    data = topi.multiply(data, tvm.const(-1, "float32"))
    num_to_vrsqrt = topi.add(data, tvm.const(1, "float32"))

    # step 2: calculate dy * (1 / sqrt(1 - x^2))
    if utils.product_is_mini():
        # mini: use newton's method for high accuracy result
        res = _vrsqrt_newton(num_to_vrsqrt)
        res = topi.multiply(res, dy)
    else:
        # cloud: use vdiv for high efficiency computation
        vsqrt_res = topi.sqrt(num_to_vrsqrt)
        res = topi.divide(dy, vsqrt_res)

    if dtype == "float16":
        res = topi.cast(res, "float16")

    return res
Пример #3
0
def asinh(x, target=utils.CCE):
    r"""
    Compute asinh function.

    .. math:: asinh(x) = log(x+\sqrt{x*x+1})

    Args:
        x (tvm.tensor.Tensor): Tensor of type float16, float32. 

    Returns:
       tvm.tensor.Tensor, has the same type and shape as x.
    
    Supported Platforms:
        'Ascend'
    """
    # check shape
    utils.check_shape(x)

    # check input tensor data_type
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    dtype = x.dtype

    # Known that, asinh(x) = log(x + sqrt(x*x+1)), and, asinh(-x) = -asinh(x)
    # If x is a large negative number, (x + sqrt(x*x+1)) will be close to zero.
    # So, asinh(x) = sign(x) * log(|x| + sqrt(|x|*|x| + 1))
    compute_dtype = dtype
    if dtype == "float16":
        # To avoid overflow and higher accuracy, x is casted to float32
        compute_dtype = "float32"
        x = topi.cast(x, compute_dtype)

    x_abs = topi.abs(x)

    if product_is_mini():
        # sqrt(|x|*|x| + 1) = |x| * sqrt(1 + 1/(|x|*|x|))
        vsquare_add_one = topi.add(1,
                                   topi.divide(1, topi.multiply(x_abs, x_abs)))
        sqrt_compute_value = sqrt_mini_newton_iter_impl(vsquare_add_one)
        sqrt_value = topi.multiply(x_abs, sqrt_compute_value)
    else:
        x_abs_square_add_one = topi.add(topi.multiply(x_abs, x_abs), 1)
        sqrt_value = topi.sqrt(x_abs_square_add_one)

    x_add_sqrt = topi.add(x_abs, sqrt_value)

    if product_is_mini():
        log_value = log_compute_mini_impl(x_add_sqrt, target)
    else:
        log_value = topi.log(x_add_sqrt)

    res = topi.multiply(Sign(x, target), log_value)

    if res.dtype != dtype:
        res = topi.cast(res, dtype)

    if product_is_mini():
        attrs = {"enable_auto_inline": False}
        return res, attrs
    return res
Пример #4
0
 def _sinh_2x(sinh_x):
     """sinh(2x) = 2*sinh(x)*sqrt(sinh(x)^2+1)"""
     sinh_x_square = topi.multiply(sinh_x, sinh_x)
     sinh_x_square_add_one = topi.add(sinh_x_square, 1)
     sqrt_value = topi.sqrt(sinh_x_square_add_one)
     sinh_x_mul_sqrt_value = topi.multiply(sinh_x, sqrt_value)
     sinh_2x = topi.multiply(2, sinh_x_mul_sqrt_value)
     return sinh_2x
Пример #5
0
def _sqrt(data):
    """Calculate sqrt by using three times newton iteration(Mini) or vsqrt(Cloud)."""
    if utils.product_is_mini():
        data_sqrt = topi.rsqrt(data)
        data_sqrt = _newton_iter(data, data_sqrt)
        data_sqrt = _newton_iter(data, data_sqrt)
        data_sqrt = _newton_iter(data, data_sqrt)
        return topi.multiply(data, data_sqrt)
    else:
        return topi.sqrt(data)
Пример #6
0
def LambApplyOptimizerAssign(grad, input_v, input_m, input_param, beta_1,
                             one_minus_beta_1, beta_2, one_minus_beta_2,
                             epsilon, steps, do_use_weight, weight_decay_rate):

    # compute next_v
    square_grad = topi.multiply(grad, grad)

    # mul_3
    mul_3_result = topi.multiply(square_grad, one_minus_beta_2)

    # mul_2
    mul_2_result = topi.multiply(input_v, beta_2)

    # compute: next_v = (multiply(self.beta_2, v) + multiply(1.0 - self.beta_2, square(grad)))
    next_v = topi.add(mul_2_result, mul_3_result)

    # compute next_m
    mul_0_result = topi.multiply(input_m, beta_1)

    # mul_1
    mul_1_result = topi.multiply(grad, one_minus_beta_1)

    # compute: next_m = (multiply(self.beta_1, m) + multiply(1.0 - self.beta_1, grad))
    next_m = topi.add(mul_0_result, mul_1_result)

    const_one = akg.tvm.const(1.0, input_v.dtype)

    # compute: beta1_correction = (1 - self.beta_1 ** steps)
    beta_1_steps = pow_compute(beta_1, steps, grad)
    neg_beta_1_step = neg(beta_1_steps, utils.CCE)
    beta1_correction = topi.add(neg_beta_1_step, const_one)

    # compute: beta2_correction = (1 - self.beta_2 ** steps)
    beta_2_steps = pow_compute(beta_2, steps, grad)
    neg_beta_2_step = neg(beta_2_steps, utils.CCE)
    beta2_correction = topi.add(neg_beta_2_step, const_one)

    # compute: next_m_unbiased = next_m / beta1_correction
    next_m_unbiased = Divide(next_m, beta1_correction, utils.CCE)
    # compute: next_v_unbiased = next_v / beta2_correction
    next_v_unbiased = Divide(next_v, beta2_correction, utils.CCE)

    # compute update
    sqrt_next_v = topi.sqrt(next_v_unbiased)
    # add_2
    add_2_result = topi.add(sqrt_next_v, epsilon)
    # compute: update = next_m / (sqrt(next_v) + self.epsilon)
    update = Divide(next_m_unbiased, add_2_result, utils.CCE)

    # compute do_use_weight_decay
    do_use_weight_mul = topi.multiply(input_param, weight_decay_rate)
    do_use_weight_decay = topi.multiply(do_use_weight_mul, do_use_weight)
    update = topi.add(do_use_weight_decay, update)

    attrs = {'enable_auto_inline': False}

    dim_info, _ = lamb_apply_optimizer_assign_set_dim_func(grad)
    if dim_info != "":
        attrs["dim"] = dim_info

    return update, next_v, next_m, attrs