示例#1
0
def l2loss(data):
    dtype = data.dtype

    check_list = ["float16", "float32"]
    if not (dtype.lower() in check_list):
        raise RuntimeError("tile_cce only support %s while dtype is %s" % (",".join(check_list), dtype))

    vc_util.check_shape(data.shape)

    orig_dtype = dtype
    if dtype.lower() == "float16":
        dtype = "float32"
        data = akg.topi.cast(data, dtype)

    # code has bug
    #shape, axis = simplify_axis_shape(shape, range(len(shape)))

    coeff_sqrt = akg.tvm.const(1.0 / (2 ** (0.5)), dtype=dtype)

    res = akg.lang.cce.vmuls(data, coeff_sqrt)
    res = akg.lang.cce.vmul(res, res)
    res, _ = sum.sum_value(res)

    if dtype != orig_dtype:
        res = akg.topi.cast(res, orig_dtype)

    return res
示例#2
0
def gather(params_shape,
           indices_shape,
           params_dtype,
           indices_dtype,
           axis,
           kernel_name,
           cce_path="./"):
    """Gather data by indices"""
    vc_util.check_shape(params_shape, length=2)
    vc_util.check_shape(indices_shape, length=1)
    vc_util.ops_dtype_check(params_dtype, vc_util.DtypeForDavinci.ALL_TYPES)
    vc_util.ops_dtype_check(indices_dtype, vc_util.DtypeForDavinci.INT32)
    vc_util.check_equal("axis", "zero", axis, 0)

    # construct compute
    o_shape = (indices_shape[0], params_shape[1])
    xx = akg.tvm.placeholder(params_shape, dtype=params_dtype, name="X")
    yy = akg.tvm.placeholder(indices_shape, dtype=indices_dtype, name="Y")
    res = akg.tvm.extern(o_shape, [xx, yy],
                         lambda ins, outs: kernel_ir(outs[0], ins[0], ins[1]),
                         name="res",
                         dtype=params_dtype)
    s = akg.tvm.create_schedule(res.op)

    # create cce
    attrs = {"enable_multicore": False}
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(s, [xx, yy, res], "cce", name=kernel_name, attrs=attrs)

    source_code = mod.imported_modules[0].get_source()
    utils.create_code(kernel_name, cce_path, source_code)

    return mod
示例#3
0
def dropout_do_mask(data_tensor, data_mask, keep_prob):
    dtype = data_tensor.dtype
    shape_tensor = [x.value for x in data_tensor.shape]
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(shape_tensor)

    strides = [1]
    for x in reversed(shape_tensor):
        strides.append(strides[-1] * x)

    if keep_prob < 0 or keep_prob > 1:
        raise RuntimeError("keep_prob must in [0,1]")

    keep_prob_const = akg.tvm.const(1.0 / keep_prob, dtype=dtype)
    data_scale_ub = akg.tvm.compute(
        shape_tensor,
        lambda *indices: data_tensor(*indices) * keep_prob_const,
        name='data_scale_ub')

    def get_index(indices):
        idx = 0
        for i in range(len(indices)):
            idx += indices[len(indices) - i - 1] * strides[i]
        return idx // 8

    if dtype == "float32":
        data_scale_ub_16 = akg.topi.cast(data_scale_ub, "float16")
        res_ub_16 = akg.tvm.compute(shape_tensor,
                                lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub_16(*indice)))
        res = akg.topi.cast(res_ub_16, "float32")
    else:
        res = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub(*indice)))

    return res
示例#4
0
def logical_not(inputs):
    vc_util.ops_dtype_check(inputs.dtype, vc_util.DtypeForDavinci.BOOL)
    vc_util.check_shape(inputs.shape)

    res = akg.topi.logical_not(inputs)

    return res
示例#5
0
def discontinous_mov(data, out_shape):
    """
    Extract the element with the odd index from the original data and copy it into a tensor with a dimension of
    2 * original dimension/2.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        out_shape (list): a list of output's shape.

    Returns:
           tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape.

    Example:
           if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]].
    """

    # check types
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    vc_util.check_shape(shape)

    output = akg.tvm.compute(out_shape,
                             lambda j, i: data[i * 2],
                             name="output")

    return output
示例#6
0
def invert_permutation_run(shape, dtype, attrs):
    # check shapes
    vc_util.check_shape(shape)

    if not (dtype.lower() in "int32"):
        raise RuntimeError(
            "indices_dtype only support int32 while dtype is %s" % dtype)

    A = akg.tvm.placeholder(shape, dtype, name="A")
    op = invert_permutation.invert_permutation(A)
    s = akg.tvm.create_schedule(op.op)

    kernel_name = utils.gen_name_kernel("invert_permutation", dtype, shape)
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(s, [A, op],
                        "cce",
                        name=kernel_name,
                        attrs=attrs,
                        polyhedral=True)

    input_data = np.random.permutation(np.arange(shape[0])).astype(np.int32)
    expect = np.full([shape[0]], 0, np.int32)
    for i, e in enumerate(input_data):
        expect[e] = i

    output = np.full([shape[0]], 0, np.int32)
    output = utils.mod_launch(mod, (input_data, output), expect=expect)

    return (input_data, ), output, expect, compare_tensor(output,
                                                          expect,
                                                          rtol=5e-03,
                                                          equal_nan=True)
示例#7
0
def pow(data1, data2):
    """
    Computes power(data1,data2) elementwise, broadcast is supported.

    Args:
        data1 (tvm.tensor.Tensor): Tensor.
        data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen.

    Returns:
        tvm.tensor.Tensor, powered result, with same type as input tensors and broadcasted shape of data1 and data2.
    """
    vc_util.elemwise_dtype_check(data1.dtype, data2.dtype)
    vc_util.check_shape(data1.shape)
    vc_util.check_shape(data2.shape)
    vc_util.auto_broadcast_check(data1.shape, data2.shape)

    in_dtype = data1.dtype
    if in_dtype == 'float16':
        data1 = akg.topi.cast(data1, 'float32')
        data2 = akg.topi.cast(data2, 'float32')
    res = akg.topi.power(data1, data2)
    if in_dtype == 'float16':
        res = akg.topi.cast(res, 'float16')

    return res
示例#8
0
文件: tanh_grad.py 项目: zhuyawen/akg
def tanh_grad(data_y, data_dy):
    """
    Compute the backpropogation gradient of tanh.

    Args:
        data_y: Tensor, which equals the output of tanh.
        data_dy: Tensor, the initial gradients.

    Return:
        Tensor, overall gradients.
    """
    dtype = data_y.dtype
    vc_util.ops_dtype_check(data_y.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data_y.shape]
    vc_util.check_shape(shape)

    # dx = dy * (1 - y*y)
    tmp1 = akg.tvm.const(-1, dtype=dtype)
    tmp2 = akg.tvm.const(1, dtype=dtype)
    data1_square = akg.lang.cce.vmul(data_y, data_y)
    data_tmp = akg.lang.cce.vmuls(data1_square, tmp1)
    anuminate = akg.lang.cce.vadds(data_tmp, tmp2)
    res = akg.lang.cce.vmul(anuminate, data_dy)

    return res
示例#9
0
def reduce_prod(data, axis=None, keepdims=False):
    """
    Computes the product of elements along specific axis

    Args:
        data (tvm.tensor.Tensor): indicating the input tensor.
        axis (Union[list, tuple, int, None]): indicating the dimensions to reduce at. if it's None, all dimensions
                                               will be reduced.
        keepdims (Union[bool, None]): if true, keep the dimensions with length 1.

    Returns:
    Tensor, the product of elements of input tensor.
    """
    shape = [x.value for x in data.shape]
    ops_dtype_check(data.dtype, [
        DtypeForDavinci.ALL_FLOAT, DtypeForDavinci.INT8, DtypeForDavinci.UINT8
    ])

    if axis is None and keepdims is False:
        raise ValueError("keepdims must be True when axis is None!")

    axis_new = ft_util.refine_reduce_axis(data, axis)

    check_shape(shape)
    dtype = data.dtype
    if dtype in ["int8", "uint8"]:
        data = akg.topi.cast(data, "float16")

    vlog_t = akg_log(data)
    res = akg.topi.sum(vlog_t, axis=axis_new, keepdims=keepdims)
    res = akg_exp(res)

    if dtype in ["int8", "uint8"]:
        res = akg.topi.cast(res, dtype)
    return res
示例#10
0
def reciprocal(data, high_precision=True):
    """
    Computes the reciprocal of data element-wise.

    Args:
        data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32.
        high_precision (bool): a bool value, whether to use high-precision version.

    Returns:
        tvm.tensor.Tensor of same type and shape as data.
    """

    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    vc_util.check_shape(shape)

    res = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, data.dtype) / (data(*indice)), name="res")

    # When product is mini, using Newtom iteration method to achieve higher precision.
    if utils.product_is_mini() and high_precision:
        steps = 1
        for _ in range(steps):
            temp1 = data * res
            temp2 = temp1 * akg.tvm.const(-1, data.dtype)
            temp3 = temp2 + akg.tvm.const(2, data.dtype)
            res = temp3 * res

    return res
示例#11
0
def bias_add_ad(head, input_shape, data_format):
    """
    Compute gradient for bias_add operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Input tensor.
        input_shape (Union[list, tuple]): Input shape of head.
        data_format (str): Data format of input tensors.

    Returns:
        tvm.tensor.Tensor of same shape and type as head.
    """

    check_list = ["NHWC", "NC1HWC0", "DefaultFormat"]
    if data_format not in check_list:
        raise RuntimeError("bias_add_grad only support %s while dataformat is %s" % (",".join(check_list), data_format))
    vc_util.check_shape(head.shape)
    shape1 = [x.value for x in head.shape]
    vc_util.davinci_format_check(shape1, data_format)
    a = akg.tvm.placeholder(head.shape, head.dtype, "A")
    if data_format == "NC1HWC0":
        bias_shape = (1, head.shape[1], 1, 1, head.shape[4])
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    elif data_format == "NHWC":
        bias_shape = (input_shape[-1],)
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    else:
        bias_shape = (input_shape[1],)
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    c = bias_add.bias_add(a, b, data_format)

    jacs = list(akg.differentiate(c, [b], head))
    attrs = {}
    return jacs[0], attrs
示例#12
0
def reduce_any_d(x, axis=None, keepdims=False):
    """
    Reduce a tensor on a certain axis based on max.

    Args:

        x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8.
        axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced.
                                              each dim must be in the range [-len(data.shape), len(data.shape) - 1].
        keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False.

    Returns:
        tvm.tensor.Tensor of same type as input tensor x.
    """
    # check type
    vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.INT8)
    vc_util.check_shape(x.shape)
    # check axis
    vc_util.reduce_axis_check(x.shape, axis)
    refined_axis = refine_reduce_axis(x, axis)
    if len(set(refined_axis)) == len(x.shape) and not keepdims:
        keepdims = True
    res = _reduce_any_d_compute(x, refined_axis, keepdims)
    if len(set(refined_axis)) == len(x.shape):
        res = topi.reshape(res, (1, ))
    return res
示例#13
0
文件: less.py 项目: zhuyawen/akg
def less(data1, data2):
    """
    compute tensor with smaller value in data1 and data2 elementwisely.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.

    Returns:
        tvm.tensor.Tensor. If data1 less than data2, return True, else return False.
    """

    vc_util.check_shape(data1.shape)
    vc_util.check_shape(data2.shape)

    # check types
    vc_util.elemwise_dtype_check(
        data1.dtype, data2.dtype,
        [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])

    # check runtime mode, and change dtype
    if utils.product_is_mini() and data1.dtype != "float16":
        data1 = akg.topi.cast(data1, "float16")
        data2 = akg.topi.cast(data2, "float16")
    if (not utils.product_is_mini()) and data1.dtype == "int32":
        data1 = akg.topi.cast(data1, "float32")
        data2 = akg.topi.cast(data2, "float32")

    res = akg.topi.less(data1, data2)
    return res
示例#14
0
def truncate_div(input_x1, input_x2):
    """
    Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2).

    Args:
        input_x1 (tvm.tensor.Tensor): Input tensor, support float16,
                                      float32 on mini device, while support
                                      int32, int8, uint8, float16, float32 on
                                      cloud ones.
        input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1.
    Returns:
        A tvm.tensor.Tensor as result of truncate_div.
    """
    vc_util.check_shape(get_shape(input_x1))
    vc_util.check_shape(get_shape(input_x2))
    vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype)
    vc_util.ops_dtype_check(
        input_x1.dtype,
        (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \
            else (vc_util.DtypeForDavinci.ALL_FLOAT,
                  vc_util.DtypeForDavinci.INT32,
                  vc_util.DtypeForDavinci.INT8,
                  vc_util.DtypeForDavinci.UINT8))

    return truncate_div_compute(input_x1, input_x2)
示例#15
0
def broadcast_to(x, shape):
    """
    Broadcast an tensor to a compatible shape.

    Args:
        x (tvm.tensor.Tensor): Tensor of type float32, float16, int8, uint8, int32
        shape (list, tuple): The shape of output tensor.

    Returns:
        An tvm.tensor.Tensor with the same type as x.

    """
    # check shape
    vc_util.check_shape(x)
    vc_util.check_shape(shape)

    # check dtype
    dtype = x.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES)

    # vector_dup instruction don't support int8 and uint8
    # It can be simplified by some methods, such as , "auto cast"
    x_shape = get_shape(x)
    if len(x_shape) == 1 and x_shape[0] == 1 and dtype in ["int8", "uint8"]:
        x = cast(x, "float16")

    res = topi.broadcast_to(x, shape)
    if res.dtype != dtype:
        res = cast(res, dtype)
    return res
示例#16
0
文件: floordiv.py 项目: zhuyawen/akg
def floordiv(data1, data2):
    """
    Calculate x/y, and always returns an integer which is floored.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has type of int32.
    """
    vc_util.ops_dtype_check([data1.dtype, data2.dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    shape1 = [x.value for x in data1.shape]
    vc_util.check_shape(shape1)
    shape2 = [x.value for x in data2.shape]
    vc_util.check_shape(shape2)

    if utils.product_is_mini():
        rec = reciprocal(data2, high_precision=True)
        res = data1 * rec
    else:
        res = akg.topi.divide(data1, data2)
    res = akg.lang.cce.floor(res)
    return res
示例#17
0
文件: xdivy.py 项目: x200510iong/akg
def xdivy(data_x1, data_x2):
    """
    Calculate data_x1 divided by data_x2.

    .. math::
        y = \\left\\{
	    \\begin{aligned}
		0, && if \\quad x1 == 0 \\\\
		\\dfrac{x1}{x2}, && otherwise
	    \\end{aligned}
	\\right.

    Args:
        data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"
        data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"

    Returns:
        tvm.tensor.Tensor
    """
    shape_x1 = get_shape(data_x1)
    shape_x2 = get_shape(data_x2)

    vc_util.check_shape(shape_x1)
    vc_util.check_shape(shape_x2)

    vc_util.elemwise_dtype_check(data_x1.dtype, data_x2.dtype)
    dtype = data_x1.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    return xdivy_compute(data_x1, data_x2)
示例#18
0
文件: clip.py 项目: zhuyawen/akg
def clip(data, min_val, max_val):
    """
    Clip the data in range(min_val, max_val).

    Change values less than min_val in data to min_val, and change values greater than max_val to max_val.

    Note:
        min_val should be smaller or equal to max_val.

    Args:
        data: Tensor.
        min_val: Float. When data < min_val, set data to min_val.
        max_val: Float. When data > max_val, set data to max_val.

    Returns:
        Tensor, has the same type and shape as data.
    """

    dtype = data.dtype
    check_list = ["float16", "float32"]
    if not dtype.lower() in check_list:
        raise RuntimeError("clip only support %s while dtype is %s" %
                           (",".join(check_list), dtype))

    shape = data.shape
    vc_util.check_shape(shape)

    res = akg.topi.clip(data, min_val, max_val)

    return res
示例#19
0
def reduce_sum(inputs, axis=None, keepdims=False):
    """
    Compute the sum of elements across dimensions of a tensor.

    Args:
        inputs (tvm.tensor.Tensor): Tensor.
        axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None.
        keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length.

    Returns:
        tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained
        with length 1, else these reduced axis will be eliminate.
    """
    axis = ft_util.refine_reduce_axis(inputs, axis)
    vc_util.check_shape(inputs.shape)

    in_dtype = inputs.dtype
    if in_dtype == 'float16':
        inputs = akg.topi.cast(inputs, 'float32')

    output = akg.topi.sum(inputs, axis=axis, keepdims=keepdims)

    if in_dtype == 'float16':
        output = akg.topi.cast(output, 'float16')

    return output
示例#20
0
def less_equal(input1, input2):
    """
    Check whether input1 lessequals to input2.

    Args:
        input1 (tvm.tensor.Tensor): Tensor.
        input2 (tvm.tensor.Tensor): Tensor.

    Returns:
        tvm.tensor.Tensor. If input1 lessequal to input2 return True, else return False.
    """
    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    vc_util.check_shape(shape1)
    vc_util.check_shape(shape2)

    shape1, shape2, shape = produce_shapes(shape1, shape2)

    vc_util.elemwise_dtype_check(input1.dtype, input2.dtype)
    dtype = input1.dtype

    # get lessequal compute
    t_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, dtype), "T")
    f_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(0, dtype), "F")

    input1_bro = akg.topi.broadcast_to(input1, shape)
    input2_bro = akg.topi.broadcast_to(input2, shape)
    c_out = akg.tvm.compute(shape, lambda *indice: akg.tvm.expr.Select(input1_bro[indice] <= input2_bro[indice],
                                                                         t_value[indice], f_value[indice]), name="C")
    res = akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res")

    return res
示例#21
0
def logsoftmax_grad(Y, dY, axis):
    """
    Computes the back propagation gradients by chain rule.

    Args:
        Y: Tensor, holds the logsoftmax activation output.
        dY: Tensor, holds the initial gradients.
        axis: Integer, on which dimension the softmax is applied.

    Returns:
        Tensor, the overall gradients.
    """
    shape = [x.value for x in Y.shape]
    vc_util.check_shape(shape)
    dtype = Y.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    if axis == -1:
        axis = len(shape) + axis
    if axis >= len(shape):
        raise RuntimeError("axis should be less than dimension")
    if axis < -1:
        raise RuntimeError(
            "negative axis only support -1, please specify the axis in positive value"
        )

    softmax = akg.topi.exp(Y)
    dy_sum = akg.lang.cce.sum(dY, axis=axis)
    dy_sum_broadcast = akg.lang.cce.broadcast(dy_sum, shape)
    mul_result = akg.lang.cce.vmul(softmax, dy_sum_broadcast)
    res = akg.lang.cce.vsub(dY, mul_result)
    attrs = {"pragma_reschedule": 1, "pragma_modshift": 1}
    return res, attrs
示例#22
0
文件: concat.py 项目: zhuyawen/akg
def concat(data, axis):
    """
    Concatenates data along the dimension set by axis.

    Args:
        data (Union[list, tuple]): list or tuple of tvm.tensor.Tensor of type float16, float32, int32, int8, uint8
        axis (int): Specifies the axis along which to concatenate. Must be in the range [-rank(data), rank(data))

    Returns:
        tvm.tensor.Tensor of same type as data.
    """

    data_size = len(data)
    if data_size < min_size:
        raise RuntimeError("The size of data must be greater equal 1")

    dtype = data[0].dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES)

    shape_0 = data[0].shape
    vc_util.check_shape(shape_0)
    if axis < 0:
        axis += len(shape_0)

    for i in range(1, data_size):
        shape_i = data[i].shape
        vc_util.check_shape(shape_i)
        if len(shape_i) != len(shape_0):
            raise ValueError("Input tensors must have same dimensions.")

    res = akg.lang.cce.concat(data, axis)
    return res
示例#23
0
文件: acos_grad.py 项目: zhuyawen/akg
def acos_grad(x, dy):
    """
    Gradient for acos.

    .. math:
        dx = [\\frac{-1}{(1 - x^2)^0.5} / ] \\cdot dy

    Args:
        x (tvm.tensor.Tensor): tensor of type float16, float32.
        dy (tvm.tensor.Tensor): tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, same type and shape as x.
    """
    dtype = x.dtype
    vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.ops_dtype_check(dy.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(x.shape)
    vc_util.check_shape(dy.shape)

    one = akg.tvm.const(1.0, dtype=dtype)
    mid_square = akg.tvm.compute(x.shape,
                                 lambda *i: (one - x(*i) * x(*i)),
                                 name="mid_square")
    rsq = rsqrt.rsqrt(mid_square)
    dx = akg.tvm.compute(x.shape, lambda *i: -rsq(*i) * dy(*i), name="dx")

    return dx
示例#24
0
def blas_axby(x, y, alpha, beta):
    r"""
    Blas axby.

    :math:`\alpha x + \beta y`

    Args:
        x (tvm.tensor.Tensor): Input `x` of type float16 or float32.
        y (tvm.tensor.Tensor): Input `y` of type float16 or float32.
        alpha (Union[int, float]): Scale of `x`.
        beta (Union[int, float]): Scale of `y`.

    Returns:
        tvm.tensor.Tensor, has the same shape and type as inputs.
    """
    vc_util.ops_dtype_check([x.dtype, y.dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(x.shape)
    vc_util.check_shape(y.shape)

    ax = akg.lang.cce.vmuls(x, alpha)
    by = akg.lang.cce.vmuls(y, beta)
    res = akg.lang.cce.vadd(ax, by)

    return res
示例#25
0
文件: relu_grad.py 项目: zhuyawen/akg
def relu_grad(inputs, head):
    """
    Computes gradient of inputs for the relu op

    Args:
        inputs: It is the same with the relu op.
        head: Tensor, has the same type and shape as inputs. Back propagation value.

    Returns:
        Tensor, has the same type and shape as inputs.
    """

    check_list = ["float16", "float32"]
    dtype = inputs.dtype
    if not dtype.lower() in check_list:
        raise RuntimeError("relu_grad only support %s while dtype is %s" % (",".join(check_list), dtype))
    shape = [x.value for x in inputs.shape]
    vc_util.check_shape(shape)

    res = akg.tvm.compute(shape,
                          lambda *i: akg.tvm.if_then_else(
                              inputs(*i) > akg.tvm.const(0, dtype),
                              head(*i), akg.tvm.const(0, dtype)
                          ))
    return res
示例#26
0
文件: pad.py 项目: zhuyawen/akg
def pad(data, paddings, padtype):
    """add paddings to the tensor
    :shape: The shape of the tensor, now only support two dimension Tensor
    :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor,
     For each dimension D of input, paddings[D, 0] indicates how many values to add before
     the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to
     add after the contents of tensor in that dimension.
    :dtype: The type of the input, float16, float32
    :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC".
    """
    # check shape
    vc_util.check_shape(data.shape)
    # check types
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_TYPES)
    # check padding types
    ptype_checklist = ['constant']
    if not (padtype in ptype_checklist):
        raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype))

    dtype = data.dtype
    if dtype == 'int8' or dtype == 'uint8':
        data = cast(data, "float16")

    rank = len(data.shape)
    pad_before = []
    pad_after = []
    for i in range(rank):
        pad_before.append(paddings[i][0])
        pad_after.append(paddings[i][1])
    B = tvm_pad(data, pad_before, pad_after=pad_after, name='B')

    if dtype == 'int8' or dtype == 'uint8':
        B = cast(B, dtype)
    return B
示例#27
0
def leaky_relu(data, negative_slop=0):
    """
    leaky_relu op for input tensor (N,C,H,W) OR (N,C1,H,W,C0).

    ..math:`max(x,negative_slop*x)`

    Args:
        data (tvm.tensor.Tensor): tensor with type float16 or float32.
        negative_slop (float): 0<=negative_slop<1

    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    vc_util.check_shape(data.shape)

    if negative_slop >= 1 or negative_slop < 0:
        raise RuntimeError(
            "leaky_relu only support negative_slop between [0,1)")

    slop_tmp = akg.tvm.const(negative_slop, dtype=dtype)
    tmp = akg.lang.cce.vmuls(data, slop_tmp)
    res = akg.lang.cce.vmax(tmp, data)

    return res
示例#28
0
文件: round.py 项目: zhuyawen/akg
def round_value(input):
    """
    rounds the values of a akg.tvm.tensor to the nearest even(integer), element-wise

    Args:
        input: akg.tvm.Tensor of type float16, float32

    Returns:
        akg.tvm.Tensor of same shape as input, of type int32

    Raises:
        ValueError: If the type of input is invalid.
    """
    dtype = input.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    shape = input.shape
    vc_util.check_shape(shape)

    if dtype == "float16":
        data_f16 = input
    else:
        data_f16 = akg.tvm.compute(shape,
                                   lambda *i: input(*i).astype("float16"),
                                   name="data_f16")

    res = akg.lang.cce.round(data_f16)

    return res
示例#29
0
文件: minimum.py 项目: zhuyawen/akg
def minimum(input1, input2):
    """
    Return the min value of two tensors element-wise.

    Note:
        minimum supports broadcasting.

    Args:
        input1: Tensor.
        input2: Tensor. Has the same type as input1.

    Returns:
        Tensor, has the same type as inputs.
    """

    vc_util.ops_dtype_check([input1.dtype, input2.dtype], vc_util.DtypeForDavinci.ALL_TYPES)
    vc_util.elemwise_dtype_check(input1.dtype, input2.dtype)
    dtype = input1.dtype

    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    vc_util.check_shape(shape1)
    vc_util.check_shape(shape2)

    vc_util.auto_broadcast_check(shape1, shape2)

    if dtype in ("int8", "uint8"):
        input1 = cast(input1, "float16")
        input2 = cast(input2, "float16")
    res = akg.topi.minimum(input1, input2)
    if dtype in ("int8", "uint8"):
        res = cast(res, dtype)

    return res
示例#30
0
文件: reverse.py 项目: zhuyawen/akg
def reverse(input_data, axis):
    """
    Reverse a tensor on some dimension.
    Args:
        input_data (tvm.tensor.Tensor): Tensor of float16, float32 and int32.
        axis (Union[list, tuple, int]): Because of don't support reverse which contain last dim, so can't equal None.
    Returns:
        tvm.tensor.Tensor,has the same type and shape as input_data
    """
    shape = get_shape(input_data)
    dtype = input_data.dtype
    # check dtype and shape
    vc_util.check_shape(shape)
    vc_util.ops_dtype_check(
        dtype,
        [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])
    # check axis
    shape_len = len(shape)
    if hasattr(axis, 'index'):
        axis = list(axis)
    if isinstance(axis, int):
        axis = [axis]
    vc_util.axis_check(shape_len, axis)
    _check_axis(axis, shape)
    # compute res
    res = reverse_compute(input_data, axis)
    return res