示例#1
0
def _compute_mini(data_input, shape):
    """
    Use log and taylor to compute
    arctanh has the feature: arctanh(-abs(x)) = -arctanh(abs(x))
    """

    data_abs = topi.abs(data_input)
    result_ln = _compute_log(data_abs)
    result_taylor = _compute_taylor(data_abs)

    data_abs = topi.cast(data_abs, "float16")
    data_input = topi.cast(data_input, "float16")
    result_taylor = topi.cast(result_taylor, "float16")
    result_ln = topi.cast(result_ln, "float16")
    # when |x| < 0.5 using taylor computing, and when 0.5<|x|<1 using log()
    data_res = tvm.compute(shape,
                           lambda *i : akg.tvm.expr.Select(data_abs(*i) < dc.half_const("float16"),
                                                           result_taylor(*i),
                                                           result_ln(*i)),
                           name="le")

    # arctanh has the feature: arctanh(-abs(x)) = -arctanh(abs(x))
    data_res_neg = topi.multiply(data_res, dc.neg_one_const("float16"))
    data_res = tvm.compute(shape,
                           lambda *i : akg.tvm.expr.Select(data_input(*i) < dc.zero_const("float16"),
                                                           data_res_neg(*i),
                                                           data_res(*i)),
                           name="neg")
    return data_res
示例#2
0
def matrix_diag(data, out_shape):
    """
    Generate a batched tensor whose value in diagonal lines are defined in `data`.

    Args:
        data (tvm.tensor.Tensor): A tensor of type float16, float32 or int32. Rank is L.
        out_shape (Union[list, tuple]): Output shape of length L + 1.
            The value of `out_shape[0, ..., L-1]` should be equal to `data.shape[0, ..., L-1]`.

    Returns:
        tvm.tensor.Tensor, has same type as "data", shape is "out_shape".
    """
    dtype = data.dtype
    utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.ALL_FLOAT,
                                    utils.DtypeForDavinci.INT32])

    shape = get_shape(data)
    utils.check_shape(data)
    utils.check_shape(out_shape, length=len(shape) + 1)
    if tuple(shape[:-1]) != tuple(out_shape[:-2]):
        raise RuntimeError("The value of out_shape[:-2] should be equal to data.shape[:-1]")

    res = akg.tvm.compute(out_shape,
                          lambda *i: akg.tvm.if_then_else(akg.tvm.all(i[-1] == i[-2], i[-1] < shape[-1]),
                                                          data(*i[:-1]),
                                                          zero_const(dtype)),
                          name="diag")

    return res
示例#3
0
def _less_equal_compare_float32(data_x, data_y):
    """if x <= y, then return 1, else 0"""
    data_out = tvm.compute(
        data_x.shape, lambda *index: tvm.expr.Select(
            data_x(*index) <= data_y(*index), dc.one_const(data_x.dtype),
            dc.zero_const(data_x.dtype)))
    return data_out
示例#4
0
def _atan2_compute(y, x):
    """compute for atan2"""
    const_pi_by_two = 1.5707963267948966192313216916398
    dtype = y.dtype
    if dtype == "float16":
        y = topi.cast(y, "float32")
        x = topi.cast(x, "float32")

    x_lt_zero_y_mask, y_ge_zero_mask = _init_atan2_mask(y, x)
    y_cmp_zero = topi.multiply(y_ge_zero_mask,
                               tvm.const(const_pi_by_two, "float32"))
    res_x_lt_zero = topi.multiply(x_lt_zero_y_mask, dc.pi_const("float32"))

    # caculate the atan(y/x) when x > 0
    if product_is_mini():
        x_rec = reciprocal(x, target=utils.CCE)
        res = topi.multiply(y, x_rec)
    else:
        res = topi.divide(y, x)
    res, _ = atan(res)

    if product_is_mini():
        tensor_zero = dc.zero_const("float16")
        x = topi.cast(x, "float16")
        y_cmp_zero = topi.cast(y_cmp_zero, "float16")
        res = topi.cast(res, "float16")
    else:
        tensor_zero = dc.zero_const("float32")

    res = tvm.compute(res.shape,
                      lambda *i: tvm.expr.Select(
                          x(*i) == tensor_zero, y_cmp_zero(*i), res(*i)),
                      name="res")

    if product_is_mini():
        res = topi.cast(res, "float32")

    res = topi.add(res, res_x_lt_zero)
    return topi.cast(res, dtype)
示例#5
0
def bool_both_zero_compute(juduged_min, juduged_max):
    """if input min and max are both zero then output_data will be all zero,so need a juduge compute tensor"""
    dtype = juduged_min.dtype
    tensor_zero = topi.full(juduged_min.shape, dtype, dc.zero_const(dtype))
    min_abs = topi.abs(juduged_min)
    max_abs = topi.abs(juduged_max)
    min_max_replace = topi.add(min_abs, max_abs)
    # just check wether min and max are all zero, if true  return 0
    bool_min_max_product_less_zero = less_compare_float32(
        min_max_replace, tensor_zero)
    bool_min_max_product_more_zero = less_compare_float32(
        tensor_zero, min_max_replace)
    bool_both_zero = topi.add(bool_min_max_product_less_zero,
                              bool_min_max_product_more_zero)

    return bool_both_zero
示例#6
0
def reduce_all(data, axis=None, keepdims=False):
    """
    Computes logical and of the input tensor.

    Args:
        data(tvm.tensor.Tensor): Tensor of type Boolean.
        axis(Union[None, int, list, tuple]): Specifies which axes to reduce, if None, all dimensions of
            input tensor data will be reduced and the shape of output tensor will be (1,).
        keepdims(Union[None, bool]): if true, keep the dimensions with length 1.

    Returns:
        tvm.tensor.Tensor of same type as input tensor data.
    """

    shape = [x.value for x in data.shape]

    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.BOOL)
    vc_util.check_shape(shape)

    if axis is None and keepdims is False:
        raise ValueError("keepdims must be True when axis is None!")

    axis_new = ft_util.refine_reduce_axis(data, axis)

    xx1 = akg.tvm.compute(shape,
                          lambda *indice: data(*indice).astype("float16"),
                          name='xx1')
    xx = (-xx1 + dc.one_const("float16"))
    yy = akg.topi.sum(xx, axis=axis_new, keepdims=keepdims)

    o_shape = list(yy.shape)

    zz = akg.tvm.compute(o_shape,
                         lambda *indice: yy(*indice).astype("bool"),
                         name='zz')

    y1 = akg.tvm.compute(
        o_shape,
        lambda *indice: akg.tvm.expr.Select(zz(
            *indice), dc.zero_const("float16"), dc.one_const("float16")),
        name="y1")

    y = akg.tvm.compute(o_shape,
                        lambda *indice: y1(*indice).astype("bool"),
                        name='y')

    return y
示例#7
0
def truncate_div_compute(input_x1, input_x2):
    """compute for truncate_div"""
    int_list = ("int32", "int8", "uint8")

    if input_x1.dtype in int_list:
        data_zero = dc.zero_const("float32")
        data_x_broad = cast(input_x1, "float32")
        data_y_broad = cast(input_x2, "float32")
        res_div = topi.divide(data_x_broad, data_y_broad)
        res_min_int = ceil(topi.minimum(res_div, data_zero))
        res_max_int = floor(topi.maximum(res_div, data_zero))
        res_trunc = topi.add(res_min_int, res_max_int)
        res_trunc = cast(res_trunc, "float32")
    else:
        res_trunc = topi.divide(input_x1, input_x2)

    return cast(res_trunc, input_x1.dtype)
示例#8
0
文件: atan2.py 项目: zhuyawen/akg
def _init_atan2_mask(data_y_, data_x_):
    """
    Compute mask for atan2.

    Args:
        data_y (tvm.tensor.Tensor): The y of atan2(y, x).
        data_x (tvm.tensor.Tensor): The x of atan2(y, x).

    Returns:
        mask (tvm.tensor.Tensor): The mask of x's and y's value.
    """
    is_cast_for_mini = utils.product_is_mini() and data_y_.dtype == "float32"

    # in mini, select only support float16
    if is_cast_for_mini:
        data_x = topi.cast(data_x_, "float16")
        data_y = topi.cast(data_y_, "float16")
    else:
        data_x = data_x_
        data_y = data_y_

    dtype_input = data_y.dtype

    tensor_one = dc.one_const(dtype_input)
    tensor_zero = dc.zero_const(dtype_input)
    tensor_neg_one = dc.neg_one_const(dtype_input)

    y_ge_zero = tvm.compute(
        data_y.shape,
        lambda *i: tvm.expr.Select(
            data_y(*i) >= tensor_zero, tensor_one, tensor_neg_one),
        name="y_ge_zero")

    x_lt_zero_y_mask = tvm.compute(
        data_y.shape,
        lambda *i: tvm.expr.Select(
            data_x(*i) < tensor_zero, y_ge_zero(*i), tensor_zero),
        name="xlt0_y_mask")

    if is_cast_for_mini:
        x_lt_zero_y_mask = topi.cast(x_lt_zero_y_mask, "float32")
        y_ge_zero = topi.cast(y_ge_zero, "float32")

    return (x_lt_zero_y_mask, y_ge_zero)
示例#9
0
def less_compare_float32(data_x, data_y):
    """if x is less than y, then return 1, else return 0"""
    shape_inputs = get_shape(data_x)
    # minimun num of float32 2**(-126)
    data_min = akg.lang.ascend.broadcast(tvm.const(2**(-126), dtype="float32"),
                                         shape_inputs, "float32")
    data_zero = akg.lang.ascend.broadcast(dc.zero_const("float32"),
                                          shape_inputs, "float32")
    res_sub = topi.subtract(data_y, data_x)
    res_min = topi.minimum(res_sub, data_min)
    res_max = topi.maximum(res_min, data_zero)
    # max num of float32 is 2**126
    # but cce can only support 2**62, so use 62 * 62 * 2 to adaptor 126
    res_mul_fierst = topi.multiply(res_max, tvm.const(2**62, dtype="float32"))
    res_mul_second = topi.multiply(res_mul_fierst,
                                   tvm.const(2**62, dtype="float32"))
    res = topi.multiply(res_mul_second, tvm.const(2**2, dtype="float32"))

    return res
def fused_minimum_or_maximum_grad(dz, x, y, grad_x, grad_y, op_type):
    """
    Gradient for minimum or maximum operation between two input tensors `x` and `y`.

    Args:
        dz (tvm.tensor.Tensor): Type float16, float32, int32.
        x (tvm.tensor.Tensor): Type float16, float32, int32.
        y (tvm.tensor.Tensor): Type float16, float32, int32.
        grad_x (bool): Whether calculate dx.
        grad_y (bool): Whether calculate dy.
        op_type (str): The type of the op, "GE" for MaximumGrad or "LE" for MinimumGrad.

    Note:
        At least one of grad_x and grad_y is True.

    Returns:
        dx, tvm.tensor.Tensor of the same type as inputs, it will be returned if grad_x is True.
        dy, tvm.tensor.Tensor of the same type as inputs, it will be returned if grad_y is True.
    """
    vc_util.check_shape(x)
    vc_util.check_shape(y)
    vc_util.check_shape(dz)
    vc_util.ops_dtype_check([x.dtype, y.dtype, dz.dtype],
                            [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])

    vc_util.broadcast_check(x, dz)
    vc_util.broadcast_check(y, dz)

    # check op types
    check_list = ["GE", "LE"]
    if op_type not in check_list:
        raise ValueError("FusedMinimumOrMaximumGrad only support %s while op type is %s" %
                         (",".join(check_list), op_type))

    if not grad_x and not grad_y:
        raise ValueError("At least one of grad_x and grad_y is True.")

    x_shape = get_shape(x)
    y_shape = get_shape(y)
    dz_shape = get_shape(dz)
    ori_dtype = dz.dtype

    # get greater compute
    x = akg.lang.cce.broadcast(x, dz_shape)
    y = akg.lang.cce.broadcast(y, dz_shape)

    if utils.product_is_mini() and ori_dtype != "float16":
        x = cast(x, "float16")
        y = cast(y, "float16")
        dz = cast(dz, "float16")
    elif ori_dtype == "int32":
        x = cast(x, "float32")
        y = cast(y, "float32")
        dz = cast(dz, "float32")
    zero = zero_const(dz.dtype)

    if op_type == "LE":
        dx = tvm.compute(dz_shape, lambda *i: tvm.expr.Select((x(*i) <= y(*i)), dz(*i), zero), name='dx')
        dy = topi.subtract(dz, dx)
    elif op_type == "GE":
        dx = tvm.compute(dz_shape, lambda *i: tvm.expr.Select((x(*i) >= y(*i)), dz(*i), zero), name='dx')
        dy = topi.subtract(dz, dx)

    if dx.dtype == "float16":
        # cast to fp32 for higher precision of reduce_sum.
        if get_shape(dx) != x_shape:
            dx = cast(dx, "float32")
        if get_shape(dy) != y_shape:
            dy = cast(dy, "float32")

    dx = sum.sum_by_shape(dx, x_shape)
    dy = sum.sum_by_shape(dy, y_shape)

    if ori_dtype != dx.dtype:
        dx = cast(dx, ori_dtype)
    if ori_dtype != dy.dtype:
        dy = cast(dy, ori_dtype)

    attrs = get_default_attrs()
    if grad_x and grad_y:
        return dx, dy, attrs
    if grad_x:
        return dx, attrs
    return dy, attrs
示例#11
0
def avgpool_with_img2col(data, kernel, stride, strategy):
    """
    Performs the avgpool with img2col.

    Note:
        Only support 5D format(NC1HWC0), and pooling will work on H and W.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        kernel (Union[list, tuple]): two int numbers for pooling window's size.
        stride (Union[list, tuple]): two int numbers for window's stride.
        strategy (Union[str, list, tuple]): padding, should be 'VALID','SAME' or
            instance of list(four int numbers, as 'CONSTANTS' strategy).
            Support **Strategies** is the same as avgpool.

    Returns:
        tvm.tensor.Tensor, result for gradient of avgpooling.
    """
    shape = get_shape(data)
    dtype = data.dtype

    utils.davinci_format_check(shape, "NC1HWC0", dim=5)
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16)
    utils.check_shape(kernel, 2, "Kernel")
    utils.check_shape(stride, 2, "Stride")

    kernel_h, kernel_w = kernel
    in_n, in_c1, _, _, in_c0 = shape

    [ph_h, ph_t, pw_h, pw_t], [out_h, out_w] = \
        cal_pad_shapes_by_strategy(shape, kernel, stride, strategy)

    pad = [ph_h, ph_t, pw_h, pw_t]
    pad_value = zero_const(dtype)

    # fmap img2col l1 -> ub in zZ format by fractal
    fmap_img2col_shp_ub = (in_n, in_c1, kernel_h, kernel_w, out_h, out_w,
                           in_c0)
    fmap_img2col_ub = img2col(data,
                              fmap_img2col_shp_ub,
                              kernel_h,
                              kernel_w,
                              pad,
                              stride,
                              pad_value,
                              tag="")

    out_shape = (in_n, in_c1, out_h, out_w, in_c0)
    reduce_axis_h = akg.tvm.reduce_axis((0, kernel_h), name="reduce_h")
    reduce_axis_w = akg.tvm.reduce_axis((0, kernel_w), name="reduce_w")
    res_sum = akg.tvm.compute(
        out_shape,
        lambda n, c1, oh, ow, c0: akg.tvm.sum(
            fmap_img2col_ub[n, c1, reduce_axis_h, reduce_axis_w, oh, ow, c0],
            axis=[reduce_axis_h, reduce_axis_w]),
        name="pooling_avg")

    dividor = akg.tvm.const(kernel_h * kernel_w, dtype)
    output = akg.tvm.compute(out_shape,
                             lambda *i: res_sum(*i) / dividor,
                             name="res_value")
    return output